library(ggplot2)
library(reshape2)
library(plyr)
library(RColorBrewer)
library(tikzDevice)


# set up directories ------------------------------------------------------

save_dir = "~/Documents/projectsIMI/GW4 Coding/Survey/Datasets/"
plot_dir = "~/Documents/projectsIMI/GW4 Coding/Survey/TestAnonData/"

# load datasets -----------------------------------------------------------

load(paste(save_dir,'anonData.Rdata', sep = ""))

# define plot parameters --------------------------------------------------

h1 = 2.5
w1 = 3
h2 = 3
w2 = 5
relSize1 = 0.5
relSize2 = 2.5

qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
qual_col_pals = qual_col_pals[c(6,2),]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))

qualColsCfamily = brewer.pal.info[brewer.pal.info$category == 'qual',]
qualColsCfamily = qualColsCfamily[c(6,4),]
col_Cfam = unlist(mapply(brewer.pal, qualColsCfamily$maxcolors, rownames(qualColsCfamily)))
col_Cfam = col_Cfam[1:2,]

lineColour = rgb(0, 0.4470,0.7410)
lineColour = "#377eb8"
g1 = 0.3
g2 = 0.8
grey1 = rgb(g1,g1,g1)
grey2 = rgb(g2,g2,g2)

paletteQual = "Set1"
paletteDiv = "RdBu"

# define text vectors -----------------------------------------------------

languages = rep(NA,22)
j = 0
for (i in 2:23){
  j = j + 1
  languages[j] = unlist(strsplit(colnames(anonData)[i], split='.', fixed=TRUE))[2]
}
languages[5] = "C++"
languages[6] = "C\\#"
languages[22] = "Visual Basic"

languagesOther = c(languages, "Other")

tools = rep(NA,25)
j = 0
for (i in 487:511){
  j = j + 1
  tools[j] = unlist(strsplit(colnames(anonData)[i], split='.', fixed=TRUE))[2]
}

ReasonsLanguage = c("Availability / Cost to students",
                    "Department politics",
                    "Ease of installation",
                    "Easy to find appropriate texts",
                    "Extensions Libraries available",
                    "GUI interface available",
                    "Interpreted language",
                    "Marketable to students",
                    "Object oriented language",
                    "Online community and help available",
                    "OS/Machine limitations of department",
                    "Pedagogical benefits",
                    "Platform independence",
                    "Relevant to industry",
                    "Structure of degree",
                    "Don't know / other" )

ReasonsTool = c("Associated support material",
                "Availability / Cost to students",
                "Cross platform",
                "Ease of installation",
                "Graphical User Interface",
                "Open source",
                "OS/Machine limitations of department",
                "Packaged with the language",
                "Pedagogical benefits",
                "Plugins available",
                "Relevant to industry",
                "Student motivation",
                "Supports OO paradigm",
                "Uncomplicated Ease of use",
                "Visual cues debugger",
                "Other")

steps = c("None",                                                      
          "Notice unexpected elements\nin the code",                    
          "Notice unlikely similarities\nbetween programs",   
          "Use a software-similarity\ndetection system",                
          "Interview some students/groups\nselected at random",         
          "Interview some students/groups\nwhen suspicious",
          "Interview all students/groups")

resources = c("Assignment hints",                                        
              "Cheat-sheets in exams",        
              "Discussion boards/forums",                                
              "Lecture slides/notes\n-- publisher",
              "Lecture slides/notes\n-- lecturer",          
              "Mailing list",                                            
              "Open book examinations",                                  
              "Online examinations",                                     
              "Online tutorials",                                       
              "Recorded lectures",                                       
              "Self-assessment questions",                               
              "Textbook is specified",                                   
              "Topic summaries",                                         
              "Worked examples of programming\nproblem solutions")

aimLabels = c("Algorithmic thinking",
              "Introductory experience",
              "Fundamental concepts",
              "Learn problem-solving",
              "Syntax/writing basic code",
              "Student motivation/engagement",
              "Confidence building",
              "Debugging/testing",
              "Fundamental Constructs",
              "Preparation for further study",
              "Fundamental OO concepts",
              "Read code",
              "Career motivation/preparation",
              "Software engineering",
              "Multiple paradigms")

# get basic stats ---------------------------------------------------------

meanNumStudents = mean(anonData$NumberStudents, na.rm = T)
medianNumStudents = median(anonData$NumberStudents, na.rm = T)

meanNumLanguages = mean(anonData$NumberLanguages, na.rm = T)
medianNumLanguages = median(anonData$NumberLanguages, na.rm = T)
numLanguagesOverZero = anonData$NumberLanguages[anonData$NumberLanguages > 0]

languagesPerRespondent = rowSums(anonData[,2:23], na.rm = T)
numRespondentsLang = sum(languagesPerRespondent > 0, na.rm = T)


# plot distribution of language numbers -----------------------------------

pdf(file = paste(plot_dir,"numOfLangsDist.pdf", sep = ""))
plot1 = ggplot(data=NULL,aes(x = numLanguagesOverZero))+theme_bw()+
  geom_bar(stat="count", fill=lineColour, width = 0.4)+coord_cartesian(xlim=c(0.5,4.5)) +
  xlab("\nNumber of Languages")+ylab("Number of Courses\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
plot1_xlim = ggplot_build(plot1)$panel$ranges[[1]]$x.range[2]
plot1_ylim = ggplot_build(plot1)$panel$ranges[[1]]$y.range[2]
plot1 = plot1 + annotate("text", x = 0.9*plot1_xlim, y = 0.9*plot1_ylim, label = paste("n = ", as.character(numRespondentsLang), sep = ""), size = rel(relSize2)) 
print(plot1)
dev.off()


# get popularity of languages ---------------------------------------------

countsPerLang = (colSums(anonData[,2:23],na.rm = T)) 
numAllLangs = sum(countsPerLang)

numStudentsPerLang = colSums(anonData$NumberStudents[anonData$NumberStudents != 3200]*anonData[anonData$NumberStudents != 3200,2:23],na.rm = T)

colnames(countsPerLang) = NULL
languageData = data.frame(language = languages, numCourses = countsPerLang, percentByCourse = countsPerLang/numAllLangs*100,
                          numStudents = numStudentsPerLang, percentByStudents = numStudentsPerLang/sum(numStudentsPerLang)*100, row.names = NULL)

languageData$averagePopularity = rowMeans(cbind(languageData$percentByCourse,languageData$percentByStudents))
languageData = languageData[order(languageData$averagePopularity, decreasing = T),]
languageData$language = reorder(languageData$language, order(languageData$averagePopularity, decreasing = T))

languageDataMelted = melt(languageData[languageData$averagePopularity>0,], id.vars=c("language", "numCourses", "numStudents", "averagePopularity"))
groupedLanguageScores = ddply(languageDataMelted, c("language", "variable"), summarise,value=value)
levels(groupedLanguageScores$language) = c(levels(groupedLanguageScores$language),"C Family")
levels(groupedLanguageScores$variable) = c(levels(groupedLanguageScores$variable),"percentByCourseC","percentByStudentsC")
groupedLanguageScores[(nrow(groupedLanguageScores)+1):(nrow(groupedLanguageScores)+2),1] = "C Family"
groupedLanguageScores[(nrow(groupedLanguageScores)-1):(nrow(groupedLanguageScores)),2] = c("percentByCourseC","percentByStudentsC")
groupedLanguageScores[(nrow(groupedLanguageScores)-1):(nrow(groupedLanguageScores)),3] = 
  c(sum(groupedLanguageScores[c(5,7,13),3]),
    sum(groupedLanguageScores[c(6,8,14),3]))

groupedLanguageScores$language = factor(as.character(groupedLanguageScores$language), levels = groupedLanguageScores$language[seq(1,27,2)])
groupedLanguageScores$language = factor(groupedLanguageScores$language,levels = levels(groupedLanguageScores$language)[order(groupedLanguageScores$value[grep("percentByCourse",groupedLanguageScores$variable)],decreasing = T)])


# plot language popularity ------------------------------------------------

pdf(file = paste(plot_dir,"langPercentByCourse.pdf", sep = ""))
plot2 = ggplot(data=languageData[languageData$averagePopularity>0,],aes(x = factor(language), y = percentByCourse))+theme_bw() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_bar(stat="identity", fill=lineColour)+coord_cartesian()+xlab(" ")+ylab("% of Languages by Course\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
plot2_xlim = ggplot_build(plot2)$panel$ranges[[1]]$x.range[2]
plot2_ylim = ggplot_build(plot2)$panel$ranges[[1]]$y.range[2]
plot2 = plot2 + annotate("text", x = 0.9*plot2_xlim, y = 0.9*plot2_ylim, label = paste("n = ", as.character(numAllLangs), sep = ""), size = rel(relSize2))
print(plot2)
dev.off()

pdf(file = paste(plot_dir,"langPercentByStudents.pdf", sep = ""))
plot3 = ggplot(data=languageData[languageData$averagePopularity>0,],aes(x = factor(language), y = percentByStudents))+theme_bw() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  geom_bar(stat="identity", fill = lineColour)+coord_cartesian()+xlab(" ")+ylab("% of Languages by Students\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
plot3_xlim = ggplot_build(plot3)$panel$ranges[[1]]$x.range[2]
plot3_ylim = ggplot_build(plot3)$panel$ranges[[1]]$y.range[2]
plot3 = plot3  + annotate("text", x = 0.85*plot3_xlim, y = 0.9*plot3_ylim, label = paste("n = ", as.character(sum(numStudentsPerLang)), sep = ""), size = rel(relSize2))
print(plot3)
dev.off()


# with C family
pdf(file = paste(plot_dir,"langPercentCompare.pdf", sep = ""))
plot4 = ggplot(data = groupedLanguageScores, aes(x = language, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(), 
        legend.position=c(0.75, 0.8),legend.key = element_blank(), 
        legend.text = element_text(size = rel(relSize1)), legend.key.size = unit(0.3,"cm"),
        axis.title.x = element_blank(),axis.title.y = element_blank()) +
  scale_fill_manual(breaks=c("percentByCourse","percentByStudents"),values = col_Cfam, labels=c("Language instances", "Students")) +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(1,0,0,2), "mm")) 
print(plot4)
dev.off()

# group paradigm by language ----------------------------------------------

allLangsP = NULL
allParadigms = NULL
for (i in 1:nrow(anonData)){
  if (sum(anonData[i,2:24]>0,na.rm = T) && !is.na(anonData$Paradigm[i])){
    for (j in 1:length(languagesOther)){tmp = languagesOther[which(!is.na(anonData[i,2:24]))]}
    allLangsP = c(allLangsP, tmp)
    allParadigms = c(allParadigms, rep(anonData$Paradigm[i], length(tmp)))
  }
}
languageParadigmDataRaw = data.frame(language = allLangsP,paradigm = allParadigms)
languageParadigmData = melt(table(languageParadigmDataRaw), id.vars = "language")

languageParadigmData$percentByParadigm = rep(NA,nrow(languageParadigmData))
languageParadigmData$percentByLang = rep(NA,nrow(languageParadigmData))

for (i in 1:nrow(languageParadigmData)){
  languageParadigmData$percentByParadigm[i] = languageParadigmData$value[i]/
    sum(languageParadigmData$value[which(languageParadigmData$paradigm == languageParadigmData$paradigm[i])])*100
  languageParadigmData$percentByLang[i] = languageParadigmData$value[i]/
    sum(languageParadigmData$value[which(languageParadigmData$language == languageParadigmData$language[i])])*100
}

numPerLang2 = aggregate(languageParadigmData$value, by=list(language=languageParadigmData$language), FUN=sum)
numPerParadigm2 = aggregate(languageParadigmData$value, by=list(paradigm=languageParadigmData$paradigm), FUN=sum)

paradigms = c("Functional", "Object-Oriented", "Procedural")
paradigmLabels2 = paste(paradigms,paste(paste("(n = ", numPerParadigm2$x,sep = ""),")", sep = ""),sep = "\n")
langLabels2 = paste(levels(languageParadigmData$language)[length(numPerLang2$x):1],paste(paste("(n = ", numPerLang2$x[length(numPerLang2$x):1],sep = ""),")", sep = ""),sep = " ")

# plot lang by paradigm ---------------------------------------------------

pdf(file = paste(plot_dir,"langByParadigmPercent.pdf", sep = ""))
plot5 = ggplot(data = languageParadigmData, aes(x = factor(paradigm), y = percentByParadigm)) + theme_bw() +
  geom_bar(stat = "identity", aes(fill = language), width = 0.8) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5,vjust = 0.5), legend.title=element_blank(), 
        legend.key = element_blank(), legend.text = element_text(size = rel(relSize1)),
        axis.title.x = element_blank(),
        legend.key.size = unit(0.3,"cm")) +
  ylab("Percentage") +
  scale_fill_manual(values = col_vector[1:(length(unique(languageParadigmData$language)))]) +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,1,0), "mm"))+
  scale_x_discrete(labels = paradigmLabels2)
print(plot5)
dev.off()

# plot paradigm by language -----------------------------------------------

pdf(file = paste(plot_dir,"ParadigmByLangPercent.pdf", sep = ""))
plot6 = ggplot(data = languageParadigmData, aes(x = factor(language), y = percentByLang)) + theme_bw() +
  geom_bar(stat = "identity", aes(fill = paradigm), width = 0.8) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust = 0.5), legend.title=element_blank(), 
        legend.key = element_blank(), legend.text = element_text(size = rel(relSize1*0.8)),
        axis.title.x = element_blank(),axis.title.y = element_blank(),
        legend.key.size = unit(0.5,"cm")) +
  scale_fill_manual(values = col_vector[1:length(unique(languageParadigmData$paradigm))], 
                    labels = c("Functional","Object-\nOriented","Procedural")) +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,0,2), "mm")) +
  scale_x_discrete(labels = langLabels2[length(langLabels2):1])
print(plot6)
dev.off()

# reasons for a language --------------------------------------------------

reasonsLangChosenByCourse = data.frame(R1 = rep(NA,1), R2 = rep(NA,1), R3 = rep(NA,1), R4 = rep(NA,1),
                                       R5 = rep(NA,1),R6 = rep(NA,1),R7 = rep(NA,1),R8 = rep(NA,1),R9 = rep(NA,1),
                                       R10 = rep(NA,1),R11 = rep(NA,1),R12 = rep(NA,1),R13 = rep(NA,1),R14 = rep(NA,1),
                                       R15 = rep(NA,1),R16 = rep(NA,1))

rowNo = 1
for (i in 1:nrow(anonData))
{
  for (j in 1:23){
    tmp = anonData[i,(49+(j-1)*16):(48+(j)*16)]
    if (sum(!is.na(tmp))>0 ){
      reasonsLangChosenByCourse[rowNo,1:16] = !is.na(tmp)
      rowNo = rowNo + 1
    }
  }
}

reasonsLangPerCourseTotal = colSums(reasonsLangChosenByCourse, na.rm = T)
orderTotReasons = order(reasonsLangPerCourseTotal, decreasing = F)
reasonsByCourse = data.frame(reason = factor(ReasonsLanguage, levels = ReasonsLanguage[orderTotReasons]), 
                             percentage = round(reasonsLangPerCourseTotal/nrow(reasonsLangChosenByCourse)*100,digits = 2))


# reasons for java --------------------------------------------------------

reasonsLangChosenByCourseJava = data.frame(R1 = rep(NA,1), R2 = rep(NA,1), R3 = rep(NA,1), R4 = rep(NA,1),
                                           R5 = rep(NA,1),R6 = rep(NA,1),R7 = rep(NA,1),R8 = rep(NA,1),R9 = rep(NA,1),
                                           R10 = rep(NA,1),R11 = rep(NA,1),R12 = rep(NA,1),R13 = rep(NA,1),R14 = rep(NA,1),
                                           R15 = rep(NA,1),R16 = rep(NA,1))

rowNo = 1
for (i in 1:nrow(anonData))
{
  j = 11
  tmp = anonData[i,(49+(j-1)*16):(48+(j)*16)]
  if (sum(!is.na(tmp))>0 ){
    reasonsLangChosenByCourseJava[rowNo,1:16] = !is.na(tmp)
    rowNo = rowNo + 1
  }
}

reasonsLangPerCourseTotalJava = colSums(reasonsLangChosenByCourseJava, na.rm = T)
reasonsByCourseJava = data.frame(reason = factor(ReasonsLanguage, levels = ReasonsLanguage[orderTotReasons]), 
                                 percentage = round(reasonsLangPerCourseTotalJava/nrow(reasonsLangChosenByCourseJava)*100,digits = 2))


# reasons for python ------------------------------------------------------

reasonsLangChosenByCoursePython = data.frame(R1 = rep(NA,1), R2 = rep(NA,1), R3 = rep(NA,1), R4 = rep(NA,1),
                                             R5 = rep(NA,1),R6 = rep(NA,1),R7 = rep(NA,1),R8 = rep(NA,1),R9 = rep(NA,1),
                                             R10 = rep(NA,1),R11 = rep(NA,1),R12 = rep(NA,1),R13 = rep(NA,1),R14 = rep(NA,1),
                                             R15 = rep(NA,1),R16 = rep(NA,1))

rowNo = 1
for (i in 1:nrow(anonData))
{
  j = 20
  tmp = anonData[i,(49+(j-1)*16):(48+(j)*16)]
  if (sum(!is.na(tmp))>0 ){
    reasonsLangChosenByCoursePython[rowNo,1:16] = !is.na(tmp)
    rowNo = rowNo + 1
  }
}

reasonsLangPerCourseTotalPython = colSums(reasonsLangChosenByCoursePython, na.rm = T)
reasonsByCoursePython = data.frame(reason = factor(ReasonsLanguage, levels = ReasonsLanguage[orderTotReasons]), 
                                   percentage = round(reasonsLangPerCourseTotalPython/nrow(reasonsLangChosenByCoursePython)*100,digits = 2))


# compare reasons for all, java, python -----------------------------------

reasonsByCourseCompare = rbind(reasonsByCourse,reasonsByCourseJava,reasonsByCoursePython)
reasonsByCourseCompare$language = c(rep("All",16),rep("Java", 16), rep("Python",16))

# plot reasons for language -----------------------------------------------

pdf(file = paste(plot_dir,"reasonsByCourseCompare.pdf", sep = ""))
plot7 = ggplot(data = reasonsByCourseCompare, aes(x = reason, y = percentage)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = language), width = .6) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5),
        legend.title=element_blank(), legend.position=c(0.8,0.1),
        legend.key = element_blank(), legend.text = element_text(size = rel(relSize1)),
        legend.key.size = unit(0.3, "cm"), axis.title.y = element_blank(),axis.title.x = element_blank()) +
  scale_fill_brewer(palette = paletteQual) + 
  coord_flip()  +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,1,0), "mm"))
print(plot7)
dev.off()

# plot paradigm -----------------------------------------------------------

anonData$Paradigm = as.factor(anonData$Paradigm)
anonData$Paradigm = factor(anonData$Paradigm,levels(anonData$Paradigm)[c(2,3,1)])

pdf(file = paste(plot_dir,"paradigm.pdf", sep = ""))
plot8 = ggplot(data = anonData[!is.na(anonData$Paradigm), ], aes(x = factor(Paradigm))) + theme_bw() + 
  geom_bar(stat = "count", fill = lineColour, width = 0.4) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  xlab("") + ylab("Number of Respondents") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot8)
dev.off()


# plot timing of languages ------------------------------------------------

for (i in 26:47){
  anonData[which(anonData[,i] == "Language is used for the whole of the first programming course"),i] = 1
  anonData[which(anonData[,i] == "Language is used for the first part of the first programming course, followed by another language"),i] = 2
  anonData[which(anonData[,i] == "Language is used after another language in the first programming course"),i] = 3
}

Timing.Languages = data.frame(language = languages, whole = colSums(anonData[,26:47] == 1,na.rm = T), first = colSums(anonData[,26:47] == 2,na.rm = T), after = colSums(anonData[,26:47] == 3,na.rm = T), row.names = NULL)
Timing.Languages.Melted = melt(Timing.Languages[rowSums(Timing.Languages[,c(2:4)])>0,], id.vars=c("language"))
Timing.Languages.Melted$language = factor(Timing.Languages.Melted$language, levels(Timing.Languages.Melted$language)[c(length(levels(Timing.Languages.Melted$language)):1)])

pdf(file = paste(plot_dir,"timingLanguages.pdf", sep = ""))
plot9 = ggplot(data = Timing.Languages.Melted, aes(x = language, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 0, hjust = .5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1))) +
  scale_fill_brewer(labels=c("Whole", "First", "After"), palette = "Set1") + coord_flip()+
  xlab("") + ylab("") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot9)
dev.off()

# language difficulty -----------------------------------------------------

for (i in 440:462){
  anonData[which(anonData[,i] == "Extremely Easy"),i] = 1
  anonData[which(anonData[,i] == "Moderately Easy"),i] = 2
  anonData[which(anonData[,i] == "Slightly Easy"),i] = 3
  anonData[which(anonData[,i] == "Neither Easy nor Difficult"),i] = 4
  anonData[which(anonData[,i] == "Slightly Difficult"),i] = 5
  anonData[which(anonData[,i] == "Moderately Difficult"),i] = 6
  anonData[which(anonData[,i] == "Extremely Difficult"),i] = 7
}

Difficulty.Languages = data.frame(language = languages, ExEasy = colSums(anonData[,440:461] == 1,na.rm = T),
                                  ModEasy = colSums(anonData[,440:461] == 2,na.rm = T),
                                  SliEasy = colSums(anonData[,440:461] == 3,na.rm = T),
                                  NotEasyNorDiff = colSums(anonData[,440:461] == 4,na.rm = T),
                                  SliDiff = colSums(anonData[,440:461] == 5,na.rm = T), 
                                  ModDiff = colSums(anonData[,440:461] == 6,na.rm = T), 
                                  ExDiff = colSums(anonData[,440:461] == 7,na.rm = T),row.names = NULL)


Difficulty.Languages$median = rep(NA,nrow(Difficulty.Languages))
Difficulty.Languages$numResponses = rep(NA,nrow(Difficulty.Languages))
for (i in 1:nrow(Difficulty.Languages)){
  allScores = NULL
  for (j in 1:7){
    allScores = c(allScores, rep(j,Difficulty.Languages[i,j+1]))
  }
  Difficulty.Languages$numResponses[i] = length(allScores)
  if (length(allScores)>1){
    Difficulty.Languages$median[i] = median(allScores)
  }
}

Difficulty.Languages.Melted = melt(Difficulty.Languages[rowSums(Difficulty.Languages[,c(2:8)])>0,], id.vars=c("language"))
Difficulty.Languages.Melted$language = factor(Difficulty.Languages.Melted$language, 
                                              levels(Difficulty.Languages.Melted$language)[c(length(levels(Difficulty.Languages.Melted$language)):1)])

pdf(file = paste(plot_dir,"LangMedianDiff.pdf", sep = ""))
plot10 = ggplot(data = Difficulty.Languages[!is.na(Difficulty.Languages$median),], 
              aes(x = factor(language, levels = language[order(median)]),y = median)) + theme_bw() +
  geom_bar(stat = "identity", fill = lineColour) +
  geom_text(aes(label = paste("n = ", numResponses, sep = "")), stat= "identity", vjust = 2, size = 1.5, colour = "white")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("") + ylab("Median Difficulty\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot10)
dev.off()

pdf(file = paste(plot_dir,"difficultyLanguages.pdf", sep = ""))
plot11 = ggplot(data = Difficulty.Languages.Melted[1:77,], aes(x = language, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 0, hjust = .5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1))) +
  xlab("") + ylab("")  + 
  scale_fill_brewer(labels=c("Extremely Easy", "Moderately Easy","Slightly Easy",
                             "Neither Easy nor Difficult", "Slightly Difficult", "Moderately Difficult",
                             "Extremely Difficult"), palette = "RdBu") + coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot11)
dev.off()


# language utility --------------------------------------------------------

for (i in 463:485){
  anonData[which(anonData[,i] == "Extremely Useful"),i] = 1
  anonData[which(anonData[,i] == "Moderately Useful"),i] = 2
  anonData[which(anonData[,i] == "Slightly Useful"),i] = 3
  anonData[which(anonData[,i] == "Neither Useful nor Useless"),i] = 4
  anonData[which(anonData[,i] == "Slightly Useless"),i] = 5
  anonData[which(anonData[,i] == "Moderately Useless"),i] = 6
  anonData[which(anonData[,i] == "Extremely Useless"),i] = 7
}

Use.Languages = data.frame(language = languages, ExUseful = colSums(anonData[,463:484] == 1,na.rm = T),
                           ModUseful = colSums(anonData[,463:484] == 2,na.rm = T),
                           SliUseful = colSums(anonData[,463:484] == 3,na.rm = T),
                           NotUsefulNorUseless = colSums(anonData[,463:484] == 4,na.rm = T),
                           SliUseless = colSums(anonData[,463:484] == 5,na.rm = T), 
                           ModUseless = colSums(anonData[,463:484] == 6,na.rm = T), 
                           ExUseless = colSums(anonData[,463:484] == 7,na.rm = T),row.names = NULL)


Use.Languages$median = rep(NA,nrow(Difficulty.Languages))
Use.Languages$numResponses = rep(NA,nrow(Difficulty.Languages))
for (i in 1:nrow(Use.Languages)){
  allScores = NULL
  for (j in 1:7){
    allScores = c(allScores, rep(j,Use.Languages[i,j+1]))
  }
  Use.Languages$numResponses[i] = length(allScores)
  if (length(allScores)>1){
    Use.Languages$median[i] = median(allScores)
  }
}

Use.Languages.Melted = melt(Use.Languages[rowSums(Use.Languages[,c(2:8)])>0,], id.vars=c("language"))
Use.Languages.Melted$language = factor(Use.Languages.Melted$language, 
                                       levels(Use.Languages.Melted$language)[c(length(levels(Use.Languages.Melted$language)):1)])

pdf(file = paste(plot_dir,"LangMedianUse.pdf", sep = ""))
plot12 = ggplot(data = Use.Languages[!is.na(Use.Languages$median),], 
              aes(x = factor(language, levels = language[order(median)]),y = median)) + theme_bw() +
  geom_bar(stat = "identity", fill = lineColour) +
  geom_text(aes(label = paste("n = ", numResponses, sep = "")), stat= "identity", vjust = 2, size = 1.5, colour = "white")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("") + ylab("Median Usefulness\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot12)
dev.off()

pdf(file = paste(plot_dir,"usefulnessLanguages.pdf", sep = ""))
plot13 = ggplot(data = Use.Languages.Melted[1:77,], aes(x = language, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1))) +
  xlab("") + ylab("") + 
  scale_fill_brewer(labels=c("Extremely Useful", "Moderately Useful","Slightly Useful",
                             "Neither Useful nor Useless", "Slightly Useless", "Moderately Useless",
                             "Extremely Useless"), palette = "RdBu")+ coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot13)
dev.off()


# language difficulty vs utility ------------------------------------------

UseAndDifficulty.Languages = rbind(Use.Languages[,c(1,9)], Difficulty.Languages[,c(1,9)])
UseAndDifficulty.Languages$variable = c(rep("Usefulness",nrow(Use.Languages)), rep("Difficulty", nrow(Difficulty.Languages)))

pdf(file = paste(plot_dir,"UseAndDifficultyCompareLanguages.pdf", sep = ""))
plot14 = ggplot(data = UseAndDifficulty.Languages[!is.na(UseAndDifficulty.Languages$median),], aes(x = language, y = median)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable), width = 0.6) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(),
        legend.position="top",
        legend.key.size = unit(0.3, "cm"),
        legend.key = element_blank(), legend.text = element_text(size = rel(relSize1)), axis.title.x = element_blank())+
  scale_fill_brewer(palette = paletteQual) +
  ylab("Median") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,0,0), "mm"))
print(plot14)
dev.off()

# encourage tools ---------------------------------------------------------

anonData$EncourageTools = factor(anonData$EncourageTools, levels = c("Yes", "No", "Not applicable", NA))

pdf(file = paste(plot_dir,"encourageTools.pdf", sep = ""))
plot15 = ggplot(data = NULL, aes(x = factor(anonData$EncourageTools[!is.na(anonData$EncourageTools)])))  + theme_bw() + 
  geom_bar(stat = "count", fill = lineColour, width = 0.4)  +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  xlab("") + ylab("") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
plot15_xlim = ggplot_build(plot15)$panel$ranges[[1]]$x.range[2]
plot15_ylim = ggplot_build(plot15)$panel$ranges[[1]]$y.range[2]
plot15 = plot15 + annotate("text", x = 0.9*plot15_xlim, y = 0.9*plot15_ylim, label = paste("n = ", as.character(sum(!is.na(anonData$EncourageTools), na.rm = T)), sep = ""), size = rel(relSize2)) 
print(plot15)
dev.off()

# number of tools ---------------------------------------------------------

anonData = transform(anonData, NumberTools = rowSums(anonData[,487:511], na.rm = T))

meanNumTools = mean(anonData$NumberTools, na.rm = T)
medianNumTools = median(anonData$NumberTools, na.rm = T)

numRespondentsTools = sum(anonData$NumberTools>0, na.rm = T)

pdf(file = paste(plot_dir,"numOfToolsDist.pdf", sep = ""))
plot16 = ggplot(data=NULL,aes(x = anonData$NumberTools[anonData$NumberTools>0]))+theme_bw()+geom_bar(stat="count", fill=lineColour)+
  xlab("\nNumber of Tools")+ylab("Number of Courses\n")
plot16_xlim = ggplot_build(plot16)$panel$ranges[[1]]$x.range[2]
plot16_ylim = ggplot_build(plot16)$panel$ranges[[1]]$y.range[2]
plot16 = plot16 + annotate("text", x = 0.9*plot16_xlim, y = 0.9*plot16_ylim, label = paste("n = ", as.character(numRespondentsTools), sep = ""), size = rel(relSize2)) +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot16)
dev.off()


# tool popularity ---------------------------------------------------------

countsPerTool = (colSums(anonData[,487:511],na.rm = T)) 
numAllTools = sum(countsPerTool)

numStudentsPerTool = colSums(anonData$NumberStudents[anonData$NumberStudents < 1000]*
                               anonData[anonData$NumberStudents < 1000,487:511],na.rm = T)

numStudentsNoTool = sum(anonData$NumberStudents[anonData$NumberStudents < 1000]*
                          (anonData$NumberLanguages[anonData$NumberStudents < 1000]>0)*
                          (anonData$NumberTools[anonData$NumberStudents < 1000] == 0),na.rm = T)

colnames(countsPerTool) = NULL
toolData = data.frame(tool = tools, numCourses = countsPerTool, percentByCourse = countsPerTool/(numAllTools + (numRespondentsLang-numRespondentsTools))*100,
                      numStudents = numStudentsPerTool, percentByStudents = numStudentsPerTool/(sum(numStudentsPerTool)+numStudentsNoTool)*100, row.names = NULL)

toolData$averagePopularity = rowMeans(cbind(toolData$percentByCourse,toolData$percentByStudents))
toolData = toolData[order(toolData$averagePopularity, decreasing = T),]
toolData$tool = reorder(toolData$tool, order(toolData$averagePopularity, decreasing = T))

toolDataMelted = melt(toolData[toolData$averagePopularity>0,], id.vars=c("tool", "numCourses", "numStudents", "averagePopularity"))
groupedToolScores = ddply(toolDataMelted, c("tool", "variable"), summarise,value=value)

levels(groupedToolScores$tool) = c(levels(groupedToolScores$tool),"None")
groupedToolScores[(nrow(groupedToolScores)+1):(nrow(groupedToolScores)+2),1] = "None"
groupedToolScores[(nrow(groupedToolScores)-1):(nrow(groupedToolScores)),2] = c("percentByCourse","percentByStudents")
groupedToolScores[(nrow(groupedToolScores)-1):(nrow(groupedToolScores)),3] = c((numRespondentsLang-numRespondentsTools)/(numAllTools + (numRespondentsLang-numRespondentsTools))*100,
                                                                               numStudentsNoTool/(sum(numStudentsPerTool)+numStudentsNoTool)*100)

groupedToolScores$tool = factor(as.character(groupedToolScores$tool), levels = groupedToolScores$tool[seq(1,25,2)])
groupedToolScores$tool = factor(groupedToolScores$tool,levels = levels(groupedToolScores$tool)[order(groupedToolScores$value[groupedToolScores$variable == "percentByCourse"],decreasing = T)])


# plot popularity of tools ------------------------------------------------

pdf(file = paste(plot_dir,"toolPercentCompare.pdf", sep = ""))
plot17 = ggplot(data = groupedToolScores, aes(x = tool, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(), 
        legend.position=c(0.78, 0.84),legend.key = element_blank(), legend.text = element_text(size = rel(relSize1)),
        axis.title.x=element_blank(), axis.title.y=element_blank(), legend.key.size = unit(0.3,"cm")) +
  scale_fill_brewer(palette = paletteQual, labels=c("Tool/IDE instances", "Students")) +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,1,2), "mm"))
print(plot17)
dev.off()

# reasons for tool --------------------------------------------------------

reasonsToolChosenByCourse = data.frame(R1 = rep(NA,1), R2 = rep(NA,1), R3 = rep(NA,1), R4 = rep(NA,1),
                                       R5 = rep(NA,1),R6 = rep(NA,1),R7 = rep(NA,1),R8 = rep(NA,1),R9 = rep(NA,1),
                                       R10 = rep(NA,1),R11 = rep(NA,1),R12 = rep(NA,1),R13 = rep(NA,1),R14 = rep(NA,1),
                                       R15 = rep(NA,1),R16 = rep(NA,1))

rowNo = 1
for (i in 1:nrow(anonData))
{
  for (j in 1:25){
    tmp = anonData[i,(563+(j-1)*16):(562+(j)*16)]
    if (sum(!is.na(tmp))>0 ){
      reasonsToolChosenByCourse[rowNo,1:16] = !is.na(tmp)
      rowNo = rowNo + 1
    }
  }
}

reasonsToolPerCourseTotal = colSums(reasonsToolChosenByCourse, na.rm = T)
orderTotReasonsTool = order(reasonsToolPerCourseTotal, decreasing = F)
reasonsByCourseTool = data.frame(reason = factor(ReasonsTool, levels = ReasonsTool[orderTotReasonsTool]),percentage = round(reasonsToolPerCourseTotal/nrow(reasonsToolChosenByCourse)*100,digits = 2))


# reasons for eclipse -----------------------------------------------------

reasonsToolChosenByCourseEclipse = data.frame(R1 = rep(NA,1), R2 = rep(NA,1), R3 = rep(NA,1), R4 = rep(NA,1),
                                              R5 = rep(NA,1),R6 = rep(NA,1),R7 = rep(NA,1),R8 = rep(NA,1),R9 = rep(NA,1),
                                              R10 = rep(NA,1),R11 = rep(NA,1),R12 = rep(NA,1),R13 = rep(NA,1),R14 = rep(NA,1),
                                              R15 = rep(NA,1),R16 = rep(NA,1))

rowNo = 1
for (i in 1:nrow(anonData))
{
  j = 7
  tmp = anonData[i,(563+(j-1)*16):(562+(j)*16)]
  if (sum(!is.na(tmp))>0 ){
    reasonsToolChosenByCourseEclipse[rowNo,1:16] = !is.na(tmp)
    rowNo = rowNo + 1
  }
}

reasonsToolPerCourseTotalEclipse = colSums(reasonsToolChosenByCourseEclipse, na.rm = T)
reasonsByCourseToolEclipse = data.frame(reason = factor(ReasonsTool, levels = ReasonsTool[orderTotReasonsTool]), 
                                        percentage = round(reasonsToolPerCourseTotalEclipse/nrow(reasonsToolChosenByCourseEclipse)*100,digits = 2))


# reasons for BlueJ -------------------------------------------------------

reasonsToolChosenByCourseBlueJ = data.frame(R1 = rep(NA,1), R2 = rep(NA,1), R3 = rep(NA,1), R4 = rep(NA,1),
                                            R5 = rep(NA,1),R6 = rep(NA,1),R7 = rep(NA,1),R8 = rep(NA,1),R9 = rep(NA,1),
                                            R10 = rep(NA,1),R11 = rep(NA,1),R12 = rep(NA,1),R13 = rep(NA,1),R14 = rep(NA,1),
                                            R15 = rep(NA,1),R16 = rep(NA,1))

rowNo = 1
for (i in 1:nrow(anonData))
{
  j = 5
  tmp = anonData[i,(563+(j-1)*16):(562+(j)*16)]
  if (sum(!is.na(tmp))>0 ){
    reasonsToolChosenByCourseBlueJ[rowNo,1:16] = !is.na(tmp)
    rowNo = rowNo + 1
  }
}

reasonsToolPerCourseTotalBlueJ = colSums(reasonsToolChosenByCourseBlueJ, na.rm = T)
reasonsByCourseToolBlueJ = data.frame(reason = factor(ReasonsTool, levels = ReasonsTool[orderTotReasonsTool]), 
                                      percentage = round(reasonsToolPerCourseTotalBlueJ/nrow(reasonsToolChosenByCourseBlueJ)*100,digits = 2))

# compare reasons for: all, eclipse, bluej --------------------------------

reasonsByCourseCompareTool = rbind(reasonsByCourseTool,reasonsByCourseToolEclipse,reasonsByCourseToolBlueJ)
reasonsByCourseCompareTool$tool = c(rep("All",16),rep("Eclipse", 16), rep("BlueJ",16))

pdf(file = paste(plot_dir,"reasonsByCourseCompareTool.pdf", sep = ""), width = 4.5, height = 3)
plot18 = ggplot(data = reasonsByCourseCompareTool, aes(x = reason, y = percentage)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = tool), width = 0.6) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5),
        legend.title=element_blank(), legend.position=c(0.8,0.1),
        legend.key = element_blank(), legend.key.size = unit(0.3, "cm"), 
        legend.text = element_text(size = rel(relSize1)), axis.title.y = element_blank(), axis.title.x = element_blank()) +
  scale_fill_brewer(palette = paletteQual) + #labs(y = "Percentage") +
  coord_flip()  +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,1,5), "mm"))
print(plot18)
dev.off()


# timing of tools ---------------------------------------------------------

for (i in 513:537){
  anonData[which(anonData[,i] == "for an initial part of the first programming course?"),i] = 1
  anonData[which(anonData[,i] == "throughout the first programming course?"),i] = 2
}

Timing.Tools = data.frame(tool = tools, initial = colSums(anonData[,513:537] == 1,na.rm = T), whole = colSums(anonData[,513:537] == 2,na.rm = T), row.names = NULL)
Timing.Tools.Melted = melt(Timing.Tools[rowSums(Timing.Tools[,c(2:3)])>0,], id.vars=c("tool"))

pdf(file = paste(plot_dir,"timingTool.pdf", sep = ""))
plot19 = ggplot(data = Timing.Tools.Melted, aes(x = tool, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(),
        legend.position=c(0.8, 0.8),legend.key = element_blank(), legend.text = element_text(size = rel(relSize1))) +
  scale_fill_brewer(palette = "Set1", labels=c("Initial", "Whole")) +
  xlab("") + ylab("") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot19)
dev.off()


# any other course - tools ------------------------------------------------

for (i in 538:562){
  anonData[which(anonData[,i] == "Yes"),i] = 1
  anonData[which(anonData[,i] == "No"),i] = 2
}

AnyOtherCourse.Tools = data.frame(tool = tools, yes = colSums(anonData[,538:562] == 1,na.rm = T), no = colSums(anonData[,538:562] == 2,na.rm = T), row.names = NULL)
AnyOtherCourse.Tools.Melted = melt(AnyOtherCourse.Tools[rowSums(AnyOtherCourse.Tools[,c(2:3)])>0,], id.vars=c("tool"))

pdf(file = paste(plot_dir,"anyOtherCourseTool.pdf", sep = ""))
plot20 = ggplot(data = AnyOtherCourse.Tools.Melted, aes(x = tool, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(),
        legend.position=c(0.8, 0.8),legend.key = element_blank(), legend.text = element_text(size = rel(relSize1))) +
  scale_fill_brewer(palette = "Set1", labels=c("Yes", "No")) +
  xlab("") + ylab("") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot20)
dev.off()


# timing and other course - tools -----------------------------------------

TimingOtherCourseTools = data.frame(tool = tools, initial = colSums(anonData[,513:537] == 1,na.rm = T), whole = colSums(anonData[,513:537] == 2,na.rm = T), yes = colSums(anonData[,538:562] == 1,na.rm = T), row.names = NULL)
TimingOtherCourse.Tools.Melted = melt(TimingOtherCourseTools[rowSums(TimingOtherCourseTools[,c(2:4)])>0,], id.vars=c("tool"))

pdf(file = paste(plot_dir,"timingOtherCourseTool.pdf", sep = ""))
plot21 = ggplot(data = TimingOtherCourse.Tools.Melted, aes(x = tool, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable), width = 0.6) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(),
        legend.position=c(0.82,0.80),
        legend.key.size = unit(0.3, "cm"), 
        legend.key = element_blank(), legend.text = element_text(size = rel(relSize1)),
        axis.title.x = element_blank()) +
  scale_fill_brewer(palette = "Set1",labels=c("Initial", "Whole", "Other Course")) +
  ylab("Number of Courses") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(1,0,0,0), "mm"))
print(plot21)
dev.off()

# tool difficulty - you ---------------------------------------------------

for (i in 988:1012){
  anonData[which(anonData[,i] == "Extremely Easy"),i] = 1
  anonData[which(anonData[,i] == "Moderately Easy"),i] = 2
  anonData[which(anonData[,i] == "Slightly Easy"),i] = 3
  anonData[which(anonData[,i] == "Neither Easy nor Difficult"),i] = 4
  anonData[which(anonData[,i] == "Slightly Difficult"),i] = 5
  anonData[which(anonData[,i] == "Moderately Difficult"),i] = 6
  anonData[which(anonData[,i] == "Extremely Difficult"),i] = 7
  anonData[,i] = as.numeric(anonData[,i])
}


DifficultyYou.Tools = data.frame(tool = factor(tools, levels = tools[c(length(tools):1)]), ExEasy = colSums(anonData[,988:1012] == 1,na.rm = T),
                                 ModEasy = colSums(anonData[,988:1012] == 2,na.rm = T),
                                 SliEasy = colSums(anonData[,988:1012] == 3,na.rm = T),
                                 NotEasyNorDiff = colSums(anonData[,988:1012] == 4,na.rm = T),
                                 SliDiff = colSums(anonData[,988:1012] == 5,na.rm = T), 
                                 ModDiff = colSums(anonData[,988:1012] == 6,na.rm = T), 
                                 ExDiff = colSums(anonData[,988:1012] == 7,na.rm = T),row.names = NULL)

DifficultyYou.Tools$median = rep(NA,nrow(DifficultyYou.Tools))
DifficultyYou.Tools$numResponses = rep(NA,nrow(DifficultyYou.Tools))
for (i in 1:nrow(DifficultyYou.Tools)){
  allScores = NULL
  for (j in 1:7){
    allScores = c(allScores, rep(j,DifficultyYou.Tools[i,j+1]))
  }
  DifficultyYou.Tools$numResponses[i] = length(allScores)
  if (length(allScores)>1){
    DifficultyYou.Tools$median[i] = median(allScores)
  }
}

DifficultyYou.Tools.Melted = melt(DifficultyYou.Tools[rowSums(DifficultyYou.Tools[,c(2:8)])>0,], id.vars=c("tool"))

pdf(file = paste(plot_dir,"DifficultyYouTools.pdf", sep = ""))
plot22 = ggplot(data = DifficultyYou.Tools.Melted[1:84,], aes(x = tool, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1))) +
  xlab("") + ylab("")  + 
  scale_fill_brewer(labels=c("Extremely Easy", "Moderately Easy","Slightly Easy",
                             "Neither Easy nor Difficult", "Slightly Difficult", "Moderately Difficult",
                             "Extremely Difficult"), palette = "RdBu") + coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot22)
dev.off()

pdf(file = paste(plot_dir,"YouToolsMedianDiff.pdf", sep = ""))
plot23 = ggplot(data = DifficultyYou.Tools[!is.na(DifficultyYou.Tools$median),], 
              aes(x = factor(tool, levels = tool[order(median)]),y = median)) + theme_bw() +
  geom_bar(stat = "identity", fill = lineColour) +
  geom_text(aes(label = paste("n = ", numResponses, sep = "")), stat= "identity", vjust = 2, size = 1.5, colour = "white")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("") + ylab("Median Difficulty\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot23)
dev.off()


# tool difficulty - students ----------------------------------------------

for (i in 1013:1037){
  anonData[which(anonData[,i] == "Extremely Easy"),i] = 1
  anonData[which(anonData[,i] == "Moderately Easy"),i] = 2
  anonData[which(anonData[,i] == "Slightly Easy"),i] = 3
  anonData[which(anonData[,i] == "Neither Easy nor Difficult"),i] = 4
  anonData[which(anonData[,i] == "Slightly Difficult"),i] = 5
  anonData[which(anonData[,i] == "Moderately Difficult"),i] = 6
  anonData[which(anonData[,i] == "Extremely Difficult"),i] = 7
  anonData[,i] = as.numeric(anonData[,i])
}


DifficultyStudents.Tools = data.frame(tool = factor(tools, levels = tools[c(length(tools):1)]), ExEasy = colSums(anonData[,1013:1037] == 1,na.rm = T),
                                      ModEasy = colSums(anonData[,1013:1037] == 2,na.rm = T),
                                      SliEasy = colSums(anonData[,1013:1037] == 3,na.rm = T),
                                      NotEasyNorDiff = colSums(anonData[,1013:1037] == 4,na.rm = T),
                                      SliDiff = colSums(anonData[,1013:1037] == 5,na.rm = T), 
                                      ModDiff = colSums(anonData[,1013:1037] == 6,na.rm = T), 
                                      ExDiff = colSums(anonData[,1013:1037] == 7,na.rm = T),row.names = NULL)


DifficultyStudents.Tools$median = rep(NA,nrow(DifficultyStudents.Tools))
DifficultyStudents.Tools$numResponses = rep(NA,nrow(DifficultyStudents.Tools))
for (i in 1:nrow(DifficultyStudents.Tools)){
  allScores = NULL
  for (j in 1:7){
    allScores = c(allScores, rep(j,DifficultyStudents.Tools[i,j+1]))
  }
  DifficultyStudents.Tools$numResponses[i] = length(allScores)
  if (length(allScores)>1){
    DifficultyStudents.Tools$median[i] = median(allScores)
  }
}

DifficultyStudents.Tools.Melted = melt(DifficultyStudents.Tools[rowSums(DifficultyStudents.Tools[,c(2:8)])>0,], id.vars=c("tool"))

pdf(file = paste(plot_dir,"DifficultyStudentsTools.pdf", sep = ""))
plot24 = ggplot(data = DifficultyStudents.Tools.Melted[1:84,], aes(x = tool, y = value)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = variable)) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1))) +
  xlab("") + ylab("")  + 
  scale_fill_brewer(labels=c("Extremely Easy", "Moderately Easy","Slightly Easy",
                             "Neither Easy nor Difficult", "Slightly Difficult", "Moderately Difficult",
                             "Extremely Difficult"), palette = "RdBu") + coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot24)
dev.off()

pdf(file = paste(plot_dir,"StudentsToolsMedianDiff.pdf", sep = ""))
plot25 = ggplot(data = DifficultyStudents.Tools[!is.na(DifficultyStudents.Tools$median),], 
              aes(x = factor(tool, levels = tool[order(median)]),y = median)) + theme_bw() +
  geom_bar(stat = "identity", fill = lineColour) +
  geom_text(aes(label = paste("n = ", numResponses, sep = "")), stat= "identity", vjust = 2, size = 1.5, colour = "white")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("") + ylab("Median Difficulty\n") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)))
print(plot25)
dev.off()


# compare difficulty - you vs students ------------------------------------

DifficultyYouStudentsCompareTools = rbind(DifficultyStudents.Tools[,c(1,9,10)], DifficultyYou.Tools[,c(1,9,10)])
DifficultyYouStudentsCompareTools$Variable = c(rep("Students",nrow(DifficultyStudents.Tools)),rep("Instructor",nrow(DifficultyStudents.Tools)))
DifficultyYouStudentsCompareTools$tool = factor(DifficultyYouStudentsCompareTools$tool, levels = levels(DifficultyYouStudentsCompareTools$tool)[length(levels(DifficultyYouStudentsCompareTools$tool)):1])

pdf(file = paste(plot_dir,"DifficultyYouStudentsCompareTools.pdf", sep = ""))
plot26 = ggplot(data = DifficultyYouStudentsCompareTools[!is.na(DifficultyYouStudentsCompareTools$median),], aes(x = tool, y = median)) + theme_bw() +
  geom_bar(stat = "identity", position = "dodge", aes(fill = Variable), width = 0.6) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.title=element_blank(),
        legend.position=c(0.86,0.84),
        legend.key.size = unit(0.3, "cm"),
        legend.key = element_blank(), legend.text = element_text(size = rel(relSize1)), axis.title.x = element_blank())+
  scale_fill_brewer(palette = paletteQual) +
  ylab("Median Difficulty") +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,1,0,0), "mm"))
print(plot26)
dev.off()

# years teaching ----------------------------------------------------------

anonData$YearsTeaching = factor(anonData$YearsTeaching, levels = c("under 2 years","2 – 5 years","over 5 years – 10 years", "over 10 years – 20 years", "over 20 years – 30 years","over 30 years"))

pdf(file = paste(plot_dir,"YearsTeaching.pdf", sep = ""))
plot27 = ggplot(data = anonData[!is.na(anonData$YearsTeaching),], aes(x = YearsTeaching)) + theme_bw() +
  geom_bar(stat = "count", fill=lineColour) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.title = element_text(size = rel(relSize1)), 
        axis.text = element_text(size = rel(relSize1))) + xlab("\nYears Teaching")+ylab("")+ 
  scale_x_discrete(labels = c("$<$ 2","2 - 5","5 - 10", "10 - 20", 
                              "20 - 30", "$>$ 30"))
print(plot27)
dev.off()


# external delivery -------------------------------------------------------

anonData$ExternalDelivery = factor(anonData$ExternalDelivery, levels = c("Yes", "No"))

pdf(file = paste(plot_dir,"ExternalDelivery.pdf", sep = ""))
plot28 = ggplot(data = anonData[!is.na(anonData$ExternalDelivery),], aes(x = ExternalDelivery)) + theme_bw() +
  geom_bar(stat = "count", fill=lineColour, width = 0.4) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.title = element_text(size = rel(relSize1)), 
        axis.text = element_text(size = rel(relSize1))) + xlab("\nExternal Delivery")+ylab("")
print(plot28)
dev.off()


# unauthorised assistance -------------------------------------------------

anonData$PossibleUnauthorisedAssistance = factor(anonData$PossibleUnauthorisedAssistance, levels = c("Yes", "No", "Not applicable"))
anonData$HowConcerned = factor(anonData$HowConcerned, levels = c("Not concerned", "Somewhat concerned", "Very concerned"))

pdf(file = paste(plot_dir,"UnauthorisedAssistance.pdf", sep = ""))
plot29 = ggplot(data = anonData[!is.na(anonData$PossibleUnauthorisedAssistance),], aes(x = PossibleUnauthorisedAssistance)) + 
  theme_bw() +
  geom_bar(stat = "count", fill=lineColour, width = 0.4) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.title = element_text(size = rel(relSize1)), 
        axis.text = element_text(size = rel(relSize1))) + xlab("\nConsider Possible Unauthorised Assistance")+ylab("")
print(plot29)
dev.off()

pdf(file = paste(plot_dir,"HowConcerned.pdf", sep = ""))
plot30 = ggplot(data = anonData[!is.na(anonData$HowConcerned),], aes(x = HowConcerned)) + theme_bw() +
  geom_bar(stat = "count", fill=lineColour, width = 0.4) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5),axis.title = element_text(size = rel(relSize1)), 
        axis.text = element_text(size = rel(relSize1))) + xlab("")+ylab("Number of Instructors\n") + 
  scale_x_discrete(labels = c("Not\nConcerned", "Somewhat\nConcerned", "Very\nConcerned"))
print(plot30)
dev.off()


# steps against unauthorised assistance -----------------------------------

totalSteps = data.frame(steps = factor(steps), number = colSums(anonData[,1044:1050], na.rm = T), row.names = NULL)
totalSteps$steps = factor(totalSteps$steps, levels = totalSteps$steps[order(totalSteps$number)])

pdf(file = paste(plot_dir,"Steps.pdf", sep = ""))
plot31 = ggplot(data = totalSteps[2:7,], aes(x = steps, y = number)) + theme_bw() + 
  geom_bar(stat = "identity", fill = lineColour, width = 0.4) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1)),
        axis.title.y = element_blank()) +
  labs(y = "Number of Instructors") + coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,1,0,0), "mm"))
print(plot31)
dev.off()

# resources ---------------------------------------------------------------

totalResources = data.frame(resources = resources, number = colSums(anonData[,1052:1065], na.rm = T), row.names = NULL)
totalResources$resources = factor(totalResources$resources, levels = totalResources$resources[order(totalResources$number)])

pdf(file = paste(plot_dir,"Resources.pdf", sep = ""))
plot32 = ggplot(data = totalResources, aes(x = resources, y = number)) + 
  geom_bar(stat = "identity", fill = lineColour, width = 0.4) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1)),
        axis.title.y = element_blank()) +
  labs(y = "\nNumber of Instructors") + coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,0,0), "mm"))
print(plot32)
dev.off()

# aims --------------------------------------------------------------------

aimsCode = read.csv(file = paste(save_dir, "aims.csv", sep = ""), stringsAsFactors = F)
allAims = c(aimsCode$Code.1[!is.na(aimsCode$Code.1)],aimsCode$Code.2[!is.na(aimsCode$Code.2)], aimsCode$Code.3[!is.na(aimsCode$Code.3)])
allAims[allAims == "SE"] = 14
allAims[allAims == "P"] = 15
allAims[allAims %in% c("X", "")] = NA

for (i in 1:15){
  allAims[allAims == i] = aimLabels[i]
}

aimsTable = melt(table(allAims[!is.na(allAims)]))
aimsTable$Var1 = factor(aimsTable$Var1, levels = aimsTable$Var1[order(aimsTable$value, decreasing = F)])

pdf(file = paste(plot_dir,"Aims.pdf", sep = ""))
plot33 = ggplot(data = aimsTable, aes(x = Var1, y = value)) + 
  geom_bar(stat = "identity", fill = lineColour, width = 0.4) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5), legend.title=element_blank(),
        legend.key=element_blank(), legend.text = element_text(size = rel(relSize1)),
        axis.title.y = element_blank()) +
  labs(y = "Number of Instructors") + coord_flip() +
  theme(axis.title = element_text(size = rel(relSize1)), axis.text = element_text(size = rel(relSize1)),
        plot.margin=grid::unit(c(0,0,0,0), "mm"))
print(plot33)
dev.off()