mirror of
https://github.com/kidwellj/re_connect_survey.git
synced 2024-10-31 23:42:20 +00:00
6a8fc8f590
I finished the analyses for each section, and have moved onto beginning a file for visualizing all the data. The new "visualization data" csv file has only the columns that were asked for in lines 134-158. I'll continue to work on those plots this week.
343 lines
15 KiB
Plaintext
343 lines
15 KiB
Plaintext
---
|
|
title: "Connect Project"
|
|
output: html_document
|
|
---
|
|
|
|
```{r setup, include=FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE)
|
|
# Load RColorBrewer
|
|
# install.packages("RColorBrewer")
|
|
library(RColorBrewer)
|
|
|
|
# Define colour palettes for plots below
|
|
coul3 <- brewer.pal(3, "RdYlBu") # Using RdYlBu range to generate 3 colour palette: https://colorbrewer2.org/#type=diverging&scheme=RdYlBu&n=5
|
|
|
|
```
|
|
|
|
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
|
|
|
|
### To Do List
|
|
|
|
## Upload Data
|
|
```{r Data Upload}
|
|
connect_data = read.csv("./data/connectDATA.csv")
|
|
```
|
|
|
|
## Summary of Data
|
|
Data summary/visualisation with subsetting:
|
|
- RH: display simple summary of data (bar/pie chart) to Q25/26, Q3
|
|
|
|
```{r Frequencies}
|
|
#Frequencies#
|
|
|
|
Q25_frequencies <- table(connect_data$Q25)
|
|
Q25_frequencies
|
|
Q26_freq <- table(connect_data$Q26)
|
|
Q26_freq
|
|
Q3_freq <- table(connect_data$Q3)
|
|
Q3_freq
|
|
|
|
#test3 = as.factor(connect_data$Q3, levels = c(1, 2, 3, 4, 5), labels = c("Worldviews", "Religion", "Theology", "Ethics", "Philosophy"))
|
|
```
|
|
|
|
```{r Q25 bar/pie}
|
|
pie(Q25_frequencies, labels = c("Maybe", "No", "Yes"))
|
|
pie(Q25_frequencies, labels = c("Maybe", "No", "Yes"), col = coul3)
|
|
|
|
```
|
|
|
|
```{r Q26 bar/pie}
|
|
|
|
Q26_data <- read.csv("./data/Q26_data.csv")
|
|
|
|
Q26_freq_data <- data.frame(c("Other Priorities", "Lack Subject Knowledge", "Lack Confidence", "Current Syllabus", "Pupil Disinterest", "Department Head", "Available Work Schemes", "Unavailable Resources", "Uncertain of Pedagogical Approach"), c(table(Q26_data[,2]) [names(table(Q26_data[,2])) == "TRUE"],
|
|
table(Q26_data[,3]) [names(table(Q26_data[,3])) == "TRUE"],
|
|
table(Q26_data[,4]) [names(table(Q26_data[,4])) == "TRUE"],
|
|
table(Q26_data[,5]) [names(table(Q26_data[,5])) == "TRUE"],
|
|
table(Q26_data[,6]) [names(table(Q26_data[,6])) == "TRUE"],
|
|
table(Q26_data[,7]) [names(table(Q26_data[,7])) == "TRUE"],
|
|
table(Q26_data[,8]) [names(table(Q26_data[,8])) == "TRUE"],
|
|
table(Q26_data[,9]) [names(table(Q26_data[,9])) == "TRUE"],
|
|
table(Q26_data[,10]) [names(table(Q26_data[,10])) == "TRUE"]))
|
|
|
|
head(Q26_freq_data)
|
|
names(Q26_freq_data)[1] <- "Reasons"
|
|
names(Q26_freq_data)[2] <- "Frequency"
|
|
head(Q26_freq_data)
|
|
pie(Q26_freq_data$Frequency, labels = c("Other Priorities", "Lack Subject Knowledge", "Lack Confidence", "Current Syllabus", "Pupil Disinterest", "Department Head", "Available Work Schemes", "Unavailable Resources", "Uncertain of Pedagogical Approach"))
|
|
|
|
# Bar graph tidier
|
|
|
|
|
|
```
|
|
|
|
pie(Q26_freq)
|
|
#very messy as a pie chart - split by type? Or is it important to see crossover
|
|
|
|
Could potentially see crossover with crosstabs by type (since response is now binary variable T/F), maybe chi square; perhaps just descriptives
|
|
|
|
|
|
```{r Q3 bar/pie}
|
|
|
|
Q3_data <- read.csv("./data/Q3.csv")
|
|
|
|
#head(Q3_data)
|
|
#table(Q3_data [,3:7])
|
|
#pie(table(Q3_data [,3:7]))
|
|
|
|
Q3_data2 <- Q3_data[ ,3:7]
|
|
#head(Q3_data2)
|
|
#table(Q3_data2)
|
|
#table(Q3_data2[,1])
|
|
|
|
### want to take only the count of "True" (1) in each column. Then pie chart of the frequencies
|
|
|
|
#Q3_data3 <- read.csv("~/Documents/Github/re_connect_survey/data/Q3 copydata.csv")
|
|
|
|
#table(Q3_data3)
|
|
#count(Q3_data3, 1)
|
|
#table(Q3_data3) [names(table(Q3_data3)) == 1]
|
|
#table(Q3_data3)
|
|
|
|
table(Q3_data2[,1]) [names(table(Q3_data2[,1])) == "TRUE"]
|
|
|
|
test2 <- data.frame(c("Worldviews", "Religion", "Theology", "Ethics", "Philosophy"), c(table(Q3_data2[,1]) [names(table(Q3_data2[,1])) == "TRUE"],
|
|
table(Q3_data2[,2]) [names(table(Q3_data2[,2])) == "TRUE"],
|
|
table(Q3_data2[,3]) [names(table(Q3_data2[,3])) == "TRUE"],
|
|
table(Q3_data2[,4]) [names(table(Q3_data2[,4])) == "TRUE"],
|
|
table(Q3_data2[,5]) [names(table(Q3_data2[,5])) == "TRUE"]))
|
|
head(test2)
|
|
names(test2)[1] <- "Subject"
|
|
names(test2)[2] <- "Frequency"
|
|
head(test2)
|
|
pie(test2$Frequency, labels = c("Worldviews", "Religion", "Theology", "Ethics", "Philosophy"))
|
|
|
|
# JK note on Q3: consider here whether to use alternative forms of visualiation to reflect the overlaps when respondents picked multiple categories in responses
|
|
|
|
```
|
|
|
|
xtabs(Frequency ~ Subject, test2)
|
|
|
|
pie(Q3_freq)
|
|
#also not optimal as pie...perhaps bar
|
|
|
|
#sum(Q3_data2)
|
|
|
|
Q3_1factor = as.factor(Q3_data2$Religion)
|
|
table(Q3_1factor)
|
|
#count(Q3_1factor, "TRUE")
|
|
|
|
#test = replace(Q3_1factor, "TRUE", 1)
|
|
#test
|
|
#Q3_1factor
|
|
|
|
- RH: display summaries of responses to key questions for Q22 (syllabus evaluation), Q23, Q24, Q25, Q26, Q27, with subsetting by:
|
|
- Q8 (school type)
|
|
- Q9 (school size)
|
|
- Q10 (school location)
|
|
- Q1 (grade level) + Q35 (teaching role) + +Q5 (teaching proportion) Q2 (tenure) + and Q3 (subjects taught), + Q6/Q7 (management)
|
|
- Q12-14 (school's official religion) / Q15-16 (school's informal religion)
|
|
- Q21 (respondent personal religious background)
|
|
- Q4 (teacher's degree subject)
|
|
- Q18 (respondent gender)
|
|
- Q19 (respondent ethnic self-desc)
|
|
|
|
```{r Plots}
|
|
library(ggplot2)
|
|
|
|
# Q22
|
|
testplot <-
|
|
# Q23
|
|
|
|
# Q24
|
|
|
|
# Q25
|
|
|
|
# Q26
|
|
|
|
# Q27
|
|
```
|
|
|
|
## Correlation testing:
|
|
- RH: test for correlation between "social issue" box ticked on Q20 and responses to Q22, Q23, Q27
|
|
|
|
- Make Q20 a factor with 14 levels
|
|
- Collapse 2 Q22 columns into one mean for analyses
|
|
- Analyse 1 way anova Q20 (14 levels) by Q22; Q23[1-2]; Q27[1-7]
|
|
- 1 way within subjects?? Though not all participants ticked every box... Would it then be best to separate them out and do 14 separate analyses with bonferroni correction due to the multiple tests? - could then be 14 different t tests based on whether they ticked each one as important or not... Many analyses but that may be the most straightforward way to go. Factorial mixed ANOVA? 14 predictors, each with 2 levels (yes/no)??
|
|
- 14 predictors, within subjects, 2 levels (yes/no). DV as responses to questions. Q22 would be a factorial between subjects (only 1 option on IVs) ANOVA. Qs 23, 27 would be factorial between subjects MANOVA
|
|
|
|
|
|
```{r Analyses 1 - As Factor}
|
|
social_issues_data <- read.csv("./data/Q20_data.csv")
|
|
head(social_issues_data)
|
|
|
|
# All 14 as factors, with 2 levels: 1=YES, 2=NO
|
|
|
|
social_issues_data$brexit <- factor(social_issues_data$brexit, levels = c(1, 2), labels = c("Yes", "No"))
|
|
class(social_issues_data$brexit)
|
|
|
|
#social_issues_data[ ,4:5] <- factor(social_issues_data[ ,4:5], levels = c(1, 2), labels = c("Yes", "No"))
|
|
#Did not work; made 2 columns "NA" so am going through to make factors individually
|
|
|
|
### OR ###
|
|
|
|
#social_issues_data[ ,4:5] <- lapply(social_issues_data[ ,4:5], factor(social_issues_data[ ,4:5], levels = c(1, 2), labels = c("Yes", "No")))
|
|
|
|
social_issues_data$economy <- factor(social_issues_data$economy, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$immigration <- factor(social_issues_data$immigration, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$crime <- factor(social_issues_data$crime, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$health <- factor(social_issues_data$health, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$education <- factor(social_issues_data$education, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$housing <- factor(social_issues_data$housing, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$welfare <- factor(social_issues_data$welfare, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$defence <- factor(social_issues_data$defence, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$environment <- factor(social_issues_data$environment, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$tax <- factor(social_issues_data$tax, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$pensions <- factor(social_issues_data$pensions, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$family.life <- factor(social_issues_data$family.life, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
social_issues_data$transport <- factor(social_issues_data$transport, levels = c(1, 2), labels = c("Yes", "No"))
|
|
|
|
```
|
|
|
|
``` {r Analyses 2 - ANOVA and MANOVA}
|
|
## Q22; Q23[1-2]; Q27[1-7]
|
|
#Q22_average
|
|
#Q23_1, Q23_2
|
|
#Q27_1 - Q27_7
|
|
#t.test to see if difference in one variable - Q22_average
|
|
hist(social_issues_data$Q22_average)
|
|
t.test(Q22_average~brexit, data = social_issues_data, paired = FALSE)
|
|
#no significant difference between scores on Q22, and whether they thought brexit was important
|
|
|
|
|
|
Q_22test <- aov(Q22_average ~ brexit + economy + immigration + crime + health + education + housing + welfare + defence + environment + tax + pensions + family.life + transport, data = social_issues_data)
|
|
summary(Q_22test)
|
|
#no significant different between scores on Q22 and their opinion on social issues
|
|
|
|
Q_23test <- manova(cbind(Q23_1, Q23_2) ~ brexit + economy + immigration + crime + health + education + housing + welfare + defence + environment + tax + pensions + family.life + transport, data = social_issues_data)
|
|
summary(Q_23test)
|
|
#significant difference between scores on Q23 with economy, health, and environment
|
|
|
|
econ <- aggregate(cbind(Q23_1, Q23_2) ~ economy, data = social_issues_data, FUN = mean)
|
|
health <- aggregate(cbind(Q23_1, Q23_2) ~ health, data = social_issues_data, FUN = mean)
|
|
env <- aggregate(cbind(Q23_1, Q23_2) ~ environment, data = social_issues_data, FUN = mean)
|
|
|
|
|
|
#SORT OUT MEANS FOR THIS -- interesting pattern viewed with means
|
|
|
|
Q_27test <- manova(cbind(Q27_1, Q27_2, Q27_3, Q27_4, Q27_5, Q27_6, Q27_7) ~ brexit + economy + immigration + crime + health + education + housing + welfare + defence + environment + tax + pensions + family.life + transport, data = social_issues_data)
|
|
summary(Q_27test)
|
|
#No significant difference in responses to Q27 based on what they considered important
|
|
```
|
|
- RH: test for correlation between responses to religion questions: Q12-14, Q15-16 and Q21 and responses to Q22, Q23, Q27, [Q24, Q25, Q30]
|
|
|
|
``` {r Analyses based on religious affiliation}
|
|
religion_affiliation_data <- read.csv("./data/Religious affiliation data.csv")
|
|
head(religion_affiliation_data)
|
|
|
|
## Q12-14, with Q22, Q23, Q27
|
|
# Q12 is binary, 1st test whether difference in answers based on whether the school has formal religious character or not (similar ANOVA/MANOVA as the questions above)
|
|
religion_affiliation_data$Q12 <- factor(religion_affiliation_data$Q12, levels = c("No", "Yes"), labels = c("No", "Yes"))
|
|
|
|
## Q22
|
|
formal_affiliation_test_Q22 <- t.test(Q22_average ~ Q12, data = religion_affiliation_data, paired = FALSE)
|
|
formal_affiliation_test_Q22
|
|
|
|
## Q23
|
|
formal_affiliation_test_Q23 <- manova(cbind(Q23_1, Q23_2) ~ Q12, data = religion_affiliation_data)
|
|
summary(formal_affiliation_test_Q23)
|
|
|
|
## Q27
|
|
formal_affiliation_test_Q27 <- manova(cbind(Q27_1, Q27_2, Q27_3, Q27_4, Q27_5, Q27_6, Q27_7) ~ Q12, data = religion_affiliation_data)
|
|
summary(formal_affiliation_test_Q27)
|
|
|
|
# Then, if there is (or can anyway), explore only the "Yes" data, and see if there is a difference in answers based on the specific religious character -- Q13
|
|
# first subset the data
|
|
Q13_data <- religion_affiliation_data[religion_affiliation_data$Q12 == "Yes", ]
|
|
head(Q13_data)
|
|
|
|
# then analyze based on specific one
|
|
Q13_data$Q13_recode <- factor(Q13_data$Q13_recode, levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), labels = c("Church of England", "Roman Catholic", "Methodist", "Other Christian", "Jewish", "Muslim", "Sikh", "Hindu", "Multi-Faith", "None of the above"))
|
|
|
|
# Test with only included levels
|
|
Q13_data$Q13_recode <- factor(Q13_data$Q13_recode, levels = c(1, 2, 4, 6), labels = c("Church of England", "Roman Catholic", "Other Christian", "Muslim"))
|
|
# No change with this one...still nonsignificant difference
|
|
|
|
# Q22
|
|
hist(Q13_data$Q22_average)
|
|
specific_affiliation_test <- aov(Q22_average ~ Q13_recode, data = Q13_data)
|
|
typeof(Q13_data$Q13_recode)
|
|
summary(specific_affiliation_test)
|
|
|
|
# Q23
|
|
specific_affiliation_test_Q23 <- manova(cbind(Q23_1, Q23_2) ~ Q13_recode, data = Q13_data)
|
|
summary(specific_affiliation_test_Q23)
|
|
|
|
# Q27
|
|
specific_affiliation_test_Q27 <- manova(cbind(Q27_1, Q27_2, Q27_3, Q27_4, Q27_5, Q27_6, Q27_7) ~ Q13_recode, data = Q13_data)
|
|
summary(specific_affiliation_test_Q27)
|
|
|
|
## Q15-16 with Q22, Q23, Q27
|
|
# Q15 is binary; 1st test whether difference in answers based on whether the school has an informal religious character or not. Q16 provides further detail and can be explored
|
|
|
|
religion_affiliation_data$Q15 <- factor(religion_affiliation_data$Q15, levels = c("No", "Yes"), labels = c("No", "Yes"))
|
|
|
|
## Q22
|
|
informal_affiliation_test_Q22 <- t.test(Q22_average ~ Q15, data = religion_affiliation_data, paired = FALSE)
|
|
informal_affiliation_test_Q22
|
|
|
|
## Q23
|
|
informal_affiliation_test_Q23 <- manova(cbind(Q23_1, Q23_2) ~ Q15, data = religion_affiliation_data)
|
|
summary(informal_affiliation_test_Q23)
|
|
|
|
## Q27
|
|
informal_affiliation_test_Q27 <- manova(cbind(Q27_1, Q27_2, Q27_3, Q27_4, Q27_5, Q27_6, Q27_7) ~ Q15, data = religion_affiliation_data)
|
|
summary(informal_affiliation_test_Q27)
|
|
|
|
```
|
|
|
|
```{r Analyses based on personal religious affiliation}
|
|
## Q21 with Q22, Q23, Q27
|
|
# Q21 is personal religious affiliation. This may be more tricky as it is a free answer...but can code the type of religious affiliation and test that way? -- would be chi-square or some sort of non-para analysis due to the small number of respondents who answered this
|
|
|
|
personal_religious_affiliation_data <- read.csv("./data/Personal religious affiliation data.csv")
|
|
|
|
head(personal_religious_affiliation_data)
|
|
|
|
personal_religious_affiliation_data$Q21_binaryrecode <- factor(personal_religious_affiliation_data$Q21_binaryrecode, levels = c(1, 2), labels = c("none", "answered"))
|
|
|
|
## Q22
|
|
personal_religious_affiliation_test_Q22 <- t.test(Q22_avg ~ Q21_binaryrecode, data = personal_religious_affiliation_data, paired = FALSE)
|
|
personal_religious_affiliation_test_Q22
|
|
|
|
## Q23
|
|
personal_religious_affiliation_test_Q23 <- manova(cbind(Q23_1, Q23_2) ~ Q21_binaryrecode, data = personal_religious_affiliation_data)
|
|
summary(personal_religious_affiliation_test_Q23)
|
|
|
|
## Q27
|
|
personal_religious_affiliation_test_Q27 <- manova(cbind(Q27_1, Q27_2, Q27_3, Q27_4, Q27_5, Q27_6, Q27_7) ~ Q21_binaryrecode, data = personal_religious_affiliation_data)
|
|
summary(personal_religious_affiliation_test_Q27)
|
|
|
|
# Significant difference between answers to Q27 and whether participants indicated a personal religious affiliation -- with the small sample size it may be easier to visualize the differences here based on freeform answer
|
|
|
|
head(personal_religious_affiliation_data)
|
|
personal_religious_affiliation_means <- aggregate(cbind(Q27_1, Q27_2, Q27_3, Q27_4, Q27_5, Q27_6, Q27_7) ~ Q21_binaryrecode, data = personal_religious_affiliation_data, FUN = mean)
|
|
personal_religious_affiliation_means
|
|
|
|
## In viewing the means, it is likely the significant difference viewed in the above MANOVA is within Q27_7, with those who indicated having a personal religious affiliation reporting lower scores (M = 2.94) than those who did not answer or indicated they had no religious affiliation (M = 3.83). This makes sense as a higher score indicates they disagree that they know about "how they put their beliefs about the climate/biodiversity crisis into practice"
|
|
# Also a slight difference in Q27_3 with those indicating having a personal religious affiliation reporting slightly lower scores (M = 3.24) than those who did not answer to indicated they had no religious affiliation (M = 3.76)
|
|
```
|