From 3617ba76d2a4cffa46ead79a0fb4675d72e09e81 Mon Sep 17 00:00:00 2001 From: rehughes07 <58590776+rehughes07@users.noreply.github.com> Date: Mon, 6 Dec 2021 16:10:02 +0000 Subject: [PATCH] Beginning Data Cleaning and Graphs I have put only the complete participants' data into a new .csv file in order to upload to R and begin analyses so some can be done for next week. I have made new, in some cases slightly shorter, variable title names at the top of the .csv for ease of titling and analysing. --- .DS_Store | Bin 6148 -> 6148 bytes .RData | Bin 0 -> 2595 bytes .Rapp.history | 0 .Rhistory | 0 To-do list Markdown.Rmd | 102 ++++ data/.DS_Store | Bin 6148 -> 6148 bytes data/TSR data complete.csv | 934 +++++++++++++++++++++++++++++++++++++ 7 files changed, 1036 insertions(+) create mode 100644 .RData create mode 100644 .Rapp.history create mode 100644 .Rhistory create mode 100644 To-do list Markdown.Rmd create mode 100644 data/TSR data complete.csv diff --git a/.DS_Store b/.DS_Store index 7b9d33741ed04ce8ac5299ff580d34195ae35713..0f01229d8780a98cc0f17da84f41a4d72a1d692f 100644 GIT binary patch literal 6148 zcmeHK&2AGh5FWRIZi0XaA+;AIU$})T;ZNmIDNR$P9w4YGZ~&C)}D6YxJl?52;7nKqrm?$H3pt0I%I^dcC%32P^m2`h84odWw;KTBlPAcVl%J#412M zZx&n7_3v6>Ve_Dne9Fn6i~D?DTrkQ8w%(Uu5Jy>FuYVOw%cTn!S3IxcRW4Uvb&u7k zoBR1NYx(I5?mhAI6!u0D|_W$b!|LuY;4qI<6eDIlj8@s>os}z&i%=x?5*BxZ0;VMzIi)-H+lb2zbXlA zc3Q4l{0X06Oo<4y!#KcC^(`>{2zUn1;s&CA=Xn+Lq39Oa zRwOB=h%&G)k?&JVC)z)v1hw`SXH9&Yt9Av|I(T{q@t8Z*I-vn@Kz&MQXHtBa%SG48 zKg|ID10miegRXT9I0hU8QwDf{kWj{^#loQ8I?(7V0I&qN7TElGz&X-l(_&!|BM@Oy zfhJYhD~2%X@Jk!lv{)E4=_KssL)a$^dqWZW>8LMtIEkh~*E$9q1G5Y)n{JiQ{~fI1 zbMF7MNv`J@a18uc42V*@+iu~N?Af~Ta(vcG=mC_4;|ha11&uw9wZTX68dM9$Bpbk{ U#lj#w5ceUVX>g5W;Eyu!1Mat+Jpcdz delta 109 zcmZoMXfc=|#>AjHu~68Ik%57Mg&~C@k)ebkF{L;;Cn-Na2P6mtOc06z#06pj2DtoY sK@KsN%?cb7m^ZU?@N)po0*ZWRp3ET5#&&_71daUU{V;3JZ)$ zLpn|8+B}VF8Gm;;vem=YqU=_VbVB~#Yn_h|LKnZQmwp3MbvReY;v1J5i+Nl7O~{GA z(e|t{Br%8FHe(S~YBGi(mxPp^g~zGvjGa<2Occ`yD!Q~Js|C4k@ZZT{NoI5(Jn+*& z=o5zyy|J?ySYS8_wf^L`!HJa~cDq75GK%ZKjckm@TJKAz|)15%nt9WQ^N;&0+<=fjv}VDYpYc&R$By2bi`0ru#TW&Y4b{_b6g-P1dG^ z@=^K?DFXuX#FG9?+9Vr$gy$IoSv^Ns?X+SYG8&Qk8D%#pXa7bQ-i8MJMi5eP{IeF< z7g0(un*g%b0p8%Z$|*zq3ERJ?w(6AC^o;sFyU!MyQ-$LNfIP-2krK74_6=XrMMsL~ zPcBUr!iWvO*uumkGjs@+-*->r2yLl>X9U3` zwkqIiyOm*q053#%`8GJ zDnFPiUlluFj4Wrx6#^%T{oUdwT-POKN520sarkAo1>2fQ>^Wfu9f%G8c{aGKV{wL* z0i%N)T=J2aAVkw!69=KoLfvHrOVo$c3i{Vh4n?Y2qh_HV^(NiZP>~T?u zaiG#a4L+5;Oi3%dNc%M-Aj%*()+|R43NIPK0_E_-cbwDEVe&UJ%xyh?_oVnHngao80N1-QQKn zNwuA^2Ta}&eWKi|Z2j@a@%Ix#e;#^v;n+#s8#{&$Y}pa(plDhDg_ejH{SjH|&cphJ zj?(Z9mt=nUN~a2+6a7i?H$)m#>)2_I=XypJWH1|br@Z3#dQ?t0_(CS4W}w=B61*t+ z570aYkz%LZYUrE3EX*&$>HOsX@$v~^n1CsJee`+VDu`$mCprYsAy9cAz-$%QMU?P7 z)KYhr@!%P45X`J8TA=)~QtUHB**x@Wv~uT;-f;XUrX&k;=s1GwwHn6* z5YW&Z-8@pxAC#Z%AA)K^pTHY&hCCm-sgSSu#3Er2IS4WWbc`4f!yKz4c#`Gz>42T4 zJQQtt-za7?<~n&^@zBez@oX;hH?+NL-VzG#!{D9811hmSDgh-9OnY#QL{t$;_2<)5NSyWH9w-Uz} z6`*IN`phgVHeg0*AHw)Q11KGEO{`$p(L9n8|8kBg0Wi{nFC>;(1vnx^n%a%j$BxXeY&^@#O;Aq&n4+NAw@CfImkb1i3fw-4b5ag^5`c}dxKvps zi|7C*Hu@2ciBN>J!z>HLM|2~)1CmI(c3=n?&y1Fr^I++@N8*mfKd3^x9L!{aP5d-v z3$l44Y4EHhDa6$!Ay^a><$8IB(Xfl;ue>fW5L8mX!@{6aq#$A!{Jg)p8c-1if8yIh z_GpnFnoI(^0Q{ul5As!LeE`T}S9G`dfC#V`g}RyrJ7hvZ# zg-pU(tswu_LxPlvn&>bjr0g(_YCN6&i*i79DsYS>I3z!B$2hi^?3(0N($J+wAiQ9h z)|4sX=7QThyuj1TaXhp$kdSV<>LQ*=h6t^B!3|Szv#sF-%vgkKjIyKcaTw*QgWDtBisYCcFnxi*lev zSn!-&xI`5T9av!c)HsrxoA0QqR1S6-geuTf`c*)_QlAUDDP(E(BRWCp4F;zX~>OLceq?%%_?nmC%1 zQWgvgMkCjC(dBAwU;=@BdT^nA@EO!VsteBtVEhgQJu1=YV_%vQa_O3r8U8}nW$ZHF z6+NvQT?b=~Jn)eEppLy~+Pl#6GM$!U210zA3Ur^(J15Z02%}76gK<0-$))NDL33XQ(4N_GNU7&)AjEMos{JX1u zFAU;cMw&M|oDYPJ*jdA>RPHnHk9^C2`|T4(#Ft6owGa}il)1mz`D+`H7+j`_6^bkHaeugV?8)b6QC>Q?!VDKiCEYz9 zyyq!M$q)+S_%0&){@bD!7rjRIF)Am^waCrKYVpYYWMf}w0A%?e!pZ;u0RDy4?|}>o F002v$(%b+5 literal 0 HcmV?d00001 diff --git a/.Rapp.history b/.Rapp.history new file mode 100644 index 0000000..e69de29 diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..e69de29 diff --git a/To-do list Markdown.Rmd b/To-do list Markdown.Rmd new file mode 100644 index 0000000..9affa34 --- /dev/null +++ b/To-do list Markdown.Rmd @@ -0,0 +1,102 @@ +--- +title: "RMarkdown Admissions_Survey2021" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. + +# Upload Data +```{r Upload Data} +TSR_data <- read.csv("./data/TSR data complete.csv") +``` + +# Basic summary visualisations (RH): +- Q2 (respondent age) +```{r respondent age} +TSR_data$Age <- factor(TSR_data$Age, levels = c(1, 2, 3, 4, 5, 6, 7, 8), labels = c("15 or under", "16", "17", "18", "19", "20", "21 or over", "Prefer not to say")) +age_pie <- pie(table(TSR_data$Age)) + +``` +- Q3 (year of study) +```{r year of study} +TSR_data$MOST.RECENT.year.of.study <- factor(TSR_data$MOST.RECENT.year.of.study, levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9), labels = c("Year 11/S4/Year 12(NI)", "Year 12/S5/Year 13(NI)", "Year 13/S6/Year 14(NI)", "I am currently on a gap year", "I am currently on an undergraduate/HE college course", "I am in full-time employment", "I am unemployed", "Other", "Prefer not to say")) + +Year_study_pie <- pie(table(TSR_data$MOST.RECENT.year.of.study)) + +``` +- Q16 (gender identity) +```{r gender identity} + +TSR_data$Gender <- factor(TSR_data$Gender, levels = c(1, 2, 3, 4), labels = c("Male", "Female", "I identify my gender in another way", "Prefer not to say")) + +gender_pie <- pie(table(TSR_data$Gender)) +``` +- Q17 (ethnic self-id) +```{r ethnic self-id} + +TSR_data$Ethnicity <- factor(TSR_data$Ethnicity, levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ,18, 19), labels = c("Arab", "Asian/Asian British - Indian", "Asian/Asian British - Pakistani", "Asian/Asian British - Bangladeshi", "Asian/Asian British - Chinese", "Asian/Asian British - Any other Asian background", "Black/Black British - African", "Black/Black British - Caribbean", "Black/Black British - Any other Black background", "Mixed/Multiple Ethnic Groups - White and Black Caribbean", "Mixed/Multiple Ethnic Groups - White and Black African", "Mixed/Multiple Ethnic Groups - White and Black Asian", "Mixed/Multiple Ethnic Groups - Any other Mixed/Multiple Ethnic background", "White - English/Welsh/Scottish/Northern Irish/British", "White - Irish", "White - Gypsy or Irish Traveller", "White - Any other White background", "Other Ethnic group, please describe", "Prefer not to say")) + +#Ethnicity_bar <- ggplot(TSR_data, aes(x = Ethnicity) + geom_bar(position = "stack")) +``` +- Q18 (religion) +```{r religion} +TSR_data$Religious.Affliation <- factor(TSR_data$Religious.Affliation, levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), labels = c("Agnostic", "Atheist", "Baha'i", "Buddhist", "Christian", "Confucian", "Jain", "Jewish", "Hindu", "Indigenous Traditional Religious", "Muslim", "Pagan", "Shinto", "Sikh", "Spiritual but not religious", "Zoroastrian", "No religion", "Prefer not to say", "Other")) + +#Religious_affiliation_bar <- ggplot(TSR_data, aes(x = Religious.Affliation) + geom_bar(position = "stack")) + +``` + +# Visualisations of LIKERT responses (RH): +- For questions Q6 (subject interest) / Q5 (subject knowledge) / Q7 employability prospects: + - visualisation as summaries for all subjects LIKERT data as stacked bar chart (colours for bar segments from cool to warm) + +```{r Visualization by Subject} +### Each Subject is a different column so will need to figure out how to code the columns together into one graph + +#Q6 Subject Interest + +#Q5 Subject Knowledge + +#Q7 Employability Prospects + + +``` + - separate visualisation of summary data as pie chart only for 4 key subjects: Philosophy, Ethics, Theology, Religious Studies, but with data represented as aggregated "Positive" / "Negative" responses + - subsetted visualisations of responses with separate subsetting by response to Q8-9, Q18, Q17, Q16 +- For question Q8 + Q9 (for religious people) + - visualisation summary of responses + - show subsetted visualisations of responses by response to, Q18, Q17, Q16, Q13, Q14 +- For responses to Q10-12 (what subjects are involved in...): + - represent answer counts as descending bar chart for each Q + - subset answers by Q6 (positive / negative) and Q5 (positive / negative) + +# Correlation testing: +- For Q6 (subject interest) / Q5 (subject knowledge) / Q7 employability prospects, test for nature / strength of correlation with responses to: + - Q8-9 responses + - Q18 responses + - Q17 + - Q18 + + diff --git a/data/.DS_Store b/data/.DS_Store index 8010667e047963098e4d2919e4a3edd9495ec68b..55671837599b3e3ec23de5403a165d4aec001556 100644 GIT binary patch delta 48 zcmZoMXfc@J&&W10U^gS%ln(r86)vFmArc$j7#so#QV* E0BHygSpWb4 delta 34 qcmZoMXfc@J&&WD4U^gS{