#Cleaning SEL data for citalopram study
#Catherine Hobbs, University of Bath c.hobbs@bath.ac.uk
#20.03.2020
#RStudio Version 1.2.1335 Mac OS

#Packages Required
library(readxl)
library(tidyverse)
library(zoo)
library(hutilscpp)

#Setting Working Directory
setwd("/Volumes/files/RDSSD/Katie Studies/Citalopram/8. Data/")

#Importing raw merged (all participant data) file
SEL_raw <- read_excel("Raw/Cognitive Tasks/SEL/Merged/Citalopram_SEL_raw_trial_merged_anon.xlsx")

#Note: below code not needed for my purposes as have removed this info when anonymising data (to prevent identification), however if you're using the SEL task it may be useful to remove these columns to have a cleaner dataframe
#removevars <- names(SEL_raw) %in% (c("ExperimentName", "Clock.Information", "DataFile.Basename", "Display.RefreshRate", "Display.RefreshRate.ValidTime", "ExperimentVersion", "Group", "RandomSeed", "RuntimeCapabilities", "RuntimeVersion", "RuntimeVersionExpected", "SessionStartDateTimeUtc", "StudioVersion", "baselineintrotext", "baselinetext", "Block", "reftext", "Running[Block]", "SelfOtherBlock", "SelfOtherBlock.Cycle", "SelfOtherBlock.Sample", "trialtext", "BaselineProc", "BaselineProc.Cycle", "BaselineProc.Sample", "baselinerating.ACC", "baselinerating.CRESP", "baselinerating.DurationError", "baselinerating.OnsetDelay", "baselinerating.OnsetTime", "baselinerating.OnsetToOnsetTime", "baselinerating.RESP", "baselinerating.RT", "baselinerating.RTTime", "finalrating.ACC", "finalrating.CRESP", "finalrating.DurationError", "finalrating.OnsetDelay", "finalrating.OnsetTime", "finalrating.OnsetToOnsetTime", "finalrating.RESP", "finalrating.RT", "finalrating.RTTime", "FinalRatingBlock", "FinalRatingBlock.Cycle", "FinalRatingBlock.Sample", "introtext", "persona", "resp", "rule[Trial]", "RuleBlock", 	"RuleBlock.Cycle", "RuleBlock.Sample", "RuleBlock1", "RuleBlock1.Cycle", "RuleBlock1.Sample", "RuleBlock2", "RuleBlock2.Cycle", "RuleBlock2.Sample", "Running[Trial]", "text", "text2", "allow1", "allow2", "bottomleft", "bottomright", "fw", "globalrating.ACC", "globalrating.CRESP", "globalrating.DurationError", "globalrating.OnsetDelay", "globalrating.OnsetTime", "globalrating.OnsetToOnsetTime", "globalrating.RESP", "globalrating.RT", "globalrating.RTTime", "incorrectresponse", "jitter1", "jitter2", "perceptionrating.ACC", "perceptionrating.CRESP", "perceptionrating.DurationError", "perceptionrating.OnsetDelay", "perceptionrating.OnsetTime", "perceptionrating.OnsetToOnsetTime", "perceptionrating.RESP", "perceptionrating.RT", "perceptionrating.RTTime", "Ratings20", "Ratings20.Cycle", "Ratings20.Sample", "Ratings40", "Ratings40.Cycle", "Ratings40.Sample", "Ratings60", "Ratings60.Cycle", "Ratings60.Sample", "Ratings80", "Ratings80.Cycle", "Ratings80.Sample", "rule[SubTrial]", "Running[SubTrial]", "Slide1.DurationError", "Slide1.OnsetDelay", "Slide1.OnsetTime", "Slide1.OnsetToOnsetTime", "Slide1.RTTime", "sw", "Trial20", "Trial20.Cycle", "Trial20.Sample", "Trial40", "Trial40.Cycle", "Trial40.Sample", "Trial60", "Trial60.Cycle", "Trial60.Sample", "Trial80", "Trial80.Cycle", "Trial80.Sample", "SessionDate", "SessionTime" ))
#SEL_raw <- SEL_raw[!removevars]

SEL_clean <- SEL_raw

#Converting base ratings "How do you currently feel about self / friend / stranger?" 0 = Very Bad, 10 = Very Good
SEL_clean$baseline <- gsub(pattern = "baseline", replacement = "", x = SEL_clean$baseline)
SEL_clean$baseline <- gsub(pattern = "Text1", replacement = "", x = SEL_clean$baseline) #Invalid response so listing as NA
class(SEL_clean$baseline)
SEL_clean$baseline <- as.integer(SEL_clean$baseline)

#Converting final ratings "How do you now feel about self/friend/stranger" 0 = Very Bad, 10 = Very Good
SEL_clean$finalrating <- gsub(pattern = "global", replacement = "", x = SEL_clean$finalrating)
class(SEL_clean$finalrating)
SEL_clean$finalrating <- as.integer(SEL_clean$finalrating)

#Converting global ratings "So, overall how much did I like self/friend/stranger" 0 = Very Bad, 10 = Very Good
SEL_clean$globalrating <- gsub(pattern = "global", replacement = "", x = SEL_clean$globalrating)
class(SEL_clean$globalrating)
SEL_clean$globalrating <- as.integer(SEL_clean$globalrating)

# Converting perception ratings "How does this make you feel about  self/friend/stranger" 0 = Very Bad, 10 = Very Good
SEL_clean$perception <- gsub(pattern = "perception", replacement = "", x = SEL_clean$perception)
class(SEL_clean$perception)
SEL_clean$perception <- as.integer(SEL_clean$perception)

#Generating Condition Variable
SEL_clean$Condition[SEL_clean$`Procedure[Block]` == "Self"] <- 1
SEL_clean$Condition[SEL_clean$`Procedure[Block]` == "Friend"] <- 2
SEL_clean$Condition[SEL_clean$`Procedure[Block]` == "Stranger"] <- 3

table(SEL_clean$Condition, SEL_clean$`Procedure[Block]`)

SEL_clean$`Procedure[Block]` <- NULL

#Generating Rule Variable
SEL_clean$Rule[SEL_clean$`Procedure[Trial]` == "Proc80"] <- 80
SEL_clean$Rule[SEL_clean$`Procedure[Trial]` == "Proc60"] <- 60
SEL_clean$Rule[SEL_clean$`Procedure[Trial]` == "Proc40"] <- 40
SEL_clean$Rule[SEL_clean$`Procedure[Trial]` == "Proc20"] <- 20

table(SEL_clean$`Procedure[Trial]`, SEL_clean$Rule)

#Creating an overall rule variable (1 = Positive, 2 = Negative)
SEL_clean$Overall_rule <- ifelse(SEL_clean$Rule < 50, 2, 1) 

#Filling in missing values for ratings to the rest of that grouping variable
##Baseline (group = participant, Condition)
SEL_clean <- SEL_clean %>% group_by(Subject, Condition) %>% mutate(baseline = na.locf0(baseline)) %>% ungroup
##Final (group = participant, Condition)
SEL_clean <- SEL_clean %>% group_by(Subject, Condition) %>% mutate(finalrating = na.locf0(finalrating, fromLast = TRUE)) %>% ungroup
##Global Rating (group = participant, Condition, Rule)
SEL_clean <- SEL_clean %>% group_by(Subject, Condition, Rule) %>% mutate(globalrating = na.locf0(globalrating, fromLast = TRUE)) %>% ungroup
##Perception Rating (group = Participant, Condition, Rule)
SEL_clean <- SEL_clean %>% group_by(Subject, Condition, Rule) %>% mutate(perception = na.locf0(perception, fromLast = TRUE)) %>% ungroup

#Dropping rows without trial data (as ratings have now been transferred to the other rows for that group)
SEL_clean <- SEL_clean[!is.na(SEL_clean$feedback),] 

#Dropping variables not needed
SEL_clean$`Procedure[Trial]`<- NULL

#Renaming
names(SEL_clean) <- (gsub("Slide1.", "", names(SEL_clean) ) )

#Creating variables for total accuracy, total errors, errors to criterion, positive response rates
SEL_clean <- SEL_clean %>% 
  mutate(ERR = ifelse(ACC == 1, 0, 1)) %>% #Creating a variable for errors (if correct = 1, then errors = 0, if else errors = 1)
  group_by(Subject, Condition, Rule) %>% #Grouping by subject, then condition, then rule (should have 6 rows per subject)
  mutate(ACC_tot = sum(ACC), ERR_tot = sum(ERR), ACC_cum = cumsum(ACC), ERR_cum = cumsum(ERR), ACC_cum_reset = cumsum_reset(ACC> 0)) %>% #Calculating total number correct/errors, cumulative no. correct/errors & a variable which counts number correct until an error
  mutate(crit_met = ACC_cum_reset == 8 & !duplicated(ACC_cum_reset == 8)) %>% #Creating a variable to show the first instance where the participant makes 8 consecutive accurate responses (= TRUE)
  mutate(ERR_to_crit = ifelse(crit_met == TRUE, ERR_cum, NA)) %>% #Creating a variable to show when the criteria of 8 consecutive accurate responses was ever met, and the number of cumulative errors made at that point
  fill(ERR_to_crit, .direction = "downup") %>% #Filling the errors to criterion for that rule for all trials to allow collapsing
  mutate(crit_ever_met = sum(crit_met)) %>% #Creating a variable to show whether the criteria was ever met
  mutate(ERR_to_crit = ifelse(crit_ever_met == 0, ERR_tot, ERR_to_crit)) %>% #Assigning errors to criterion to be total no. of errors if criterion never met
  mutate(POS_resp = ifelse(Rule < 50, ERR, ACC)) %>% #calculating positive responses (reflects accuracy for positive rule(60 & 80), and errors for negative rule (20 & 40))
  ungroup()

#Keeping a trial level dataframe
SEL_clean_trial <- SEL_clean

#Creating an aggregate level dataset
SEL_aggregate <- SEL_clean %>%
  select(Subject, baseline, finalrating, globalrating, perception, ACC, RT, Condition, Rule, ERR, ACC_tot, ERR_tot, crit_ever_met, ERR_to_crit, POS_resp, Overall_rule) %>%
  group_by(Subject, Condition, Rule) %>%
  summarise_all(funs(mean(.))) %>%
  rename(participant = Subject) %>% #Renaming to match with drug matching
  ungroup()

#Adding labels
#SEL_aggregate$crit_ever_met <- factor(SEL_aggregate$crit_ever_met,
                                  #levels = c(0,1),
                                  #labels = c("No", "Yes"))

SEL_aggregate$Condition <- factor(SEL_aggregate$Condition,
                              levels = c(1, 2, 3),
                              labels = c("Self","Friend", "Stranger"))

SEL_aggregate$Overall_rule <- factor(SEL_aggregate$Overall_rule ,
                                 levels = c(1,2),
                                 labels = c("Positive", "Negative"))

#Collapsing so one row per positive & negative condition
SEL_aggregate_separate_rules <- SEL_aggregate

SEL_aggregate <- SEL_aggregate %>%
  group_by(participant, Condition, Overall_rule) %>%
  summarise_all(funs(mean(.))) %>%
  select(-Rule, crit_ever_met) %>%
  ungroup()

#Merging with drug matching (blinded)
drug_matching <- read_excel("Raw/Group Allocation/Blinded_drug_matching_KH_format_for_R_anon.xlsx")
SEL_aggregate <- merge(drug_matching, SEL_aggregate, by = "participant")

SEL_aggregate$group <- as.factor(SEL_aggregate$group)

#Quickly checking data looks correct
str(SEL_aggregate)

#Reshaping for bias scores
SEL_bias <- SEL_aggregate %>%
  select(participant, group, ERR_to_crit, Condition, Overall_rule) %>%
  spread(Overall_rule, ERR_to_crit) 

SEL_bias$bias <- SEL_bias$Positive - SEL_bias$Negative

SEL_bias <- SEL_bias %>%
  select(participant, group, Condition, bias)

#Saving Dataframes
save(SEL_clean_trial, file = "Analysis/SEL/SEL_clean_trial")
save(SEL_aggregate, file = "Analysis/SEL/SEL_clean_aggregate")
write.csv(SEL_aggregate, "Analysis/SEL/SEL_clean_aggregate.csv")
save(SEL_bias, file = "Analysis/SEL/SEL_bias")
write.csv(SEL_bias, "Analysis/SEL/SEL_bias.csv")
save(SEL_aggregate_separate_rules, file = "Analysis/SEL/SEL_aggregate_separate_rules")

