fresh render of ch1

This commit is contained in:
Jeremy Kidwell 2024-02-13 14:28:18 +00:00
parent ec2f7d0406
commit 89aa257f9e
23 changed files with 755 additions and 308 deletions

File diff suppressed because it is too large Load diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 81 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 159 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 23 KiB

View file

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View file

@ -2,7 +2,7 @@
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8"> <meta charset="utf-8">
<meta name="generator" content="quarto-1.3.450"> <meta name="generator" content="quarto-1.4.549">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
@ -48,7 +48,13 @@ ul.task-list li input[type="checkbox"] {
"collapse-after": 3, "collapse-after": 3,
"panel-placement": "start", "panel-placement": "start",
"type": "textbox", "type": "textbox",
"limit": 20, "limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": { "language": {
"search-no-results-text": "No results", "search-no-results-text": "No results",
"search-matching-documents-text": "matching documents", "search-matching-documents-text": "matching documents",
@ -57,6 +63,7 @@ ul.task-list li input[type="checkbox"] {
"search-more-match-text": "more match in this document", "search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document", "search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear", "search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel", "search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit", "search-submit-button-title": "Submit",
"search-label": "Search" "search-label": "Search"
@ -72,11 +79,11 @@ ul.task-list li input[type="checkbox"] {
<header id="quarto-header" class="headroom fixed-top"> <header id="quarto-header" class="headroom fixed-top">
<nav class="quarto-secondary-nav"> <nav class="quarto-secondary-nav">
<div class="container-fluid d-flex"> <div class="container-fluid d-flex">
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i> <i class="bi bi-layout-text-sidebar-reverse"></i>
</button> </button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./index.html">Introduction: Hacking Religion</a></li></ol></nav> <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./index.html">Introduction: Hacking Religion</a></li></ol></nav>
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }"> <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a> </a>
<button type="button" class="btn quarto-search-button" aria-label="" onclick="window.quartoOpenSearch();"> <button type="button" class="btn quarto-search-button" aria-label="" onclick="window.quartoOpenSearch();">
<i class="bi bi-search"></i> <i class="bi bi-search"></i>
@ -87,7 +94,7 @@ ul.task-list li input[type="checkbox"] {
<!-- content --> <!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article"> <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar --> <!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal sidebar-navigation floating overflow-auto"> <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
<div class="pt-lg-2 mt-2 text-left sidebar-header"> <div class="pt-lg-2 mt-2 text-left sidebar-header">
<div class="sidebar-title mb-0 py-0"> <div class="sidebar-title mb-0 py-0">
<a href="./">Hacking Religion: TRS &amp; Data Science in Action</a> <a href="./">Hacking Religion: TRS &amp; Data Science in Action</a>
@ -109,7 +116,7 @@ ul.task-list li input[type="checkbox"] {
<li class="sidebar-item"> <li class="sidebar-item">
<div class="sidebar-item-container"> <div class="sidebar-item-container">
<a href="./chapter_1.html" class="sidebar-item-text sidebar-link"> <a href="./chapter_1.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">The 2021 UK Census</span></span></a> <span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">A note if you are reading raw code here rather than the book. You can ignore the code provided here below, as this is just intended to set up the basic workspace for our future work and is also necessary for the <code>quarto</code> application we use to build this book. Quarto is an application which blends together text and blocks of code. The text begins below at “The 2021 UK Census</span></span></a>
</div> </div>
</li> </li>
<li class="sidebar-item"> <li class="sidebar-item">
@ -151,7 +158,7 @@ ul.task-list li input[type="checkbox"] {
</ul> </ul>
</div> </div>
</nav> </nav>
<div id="quarto-sidebar-glass" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass"></div> <div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar --> <!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar"> <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active"> <nav id="TOC" role="doc-toc" class="toc-active">
@ -198,40 +205,43 @@ ul.task-list li input[type="checkbox"] {
</div> </div>
</header> </header>
<section id="introduction-hacking-religion" class="level1 unnumbered"> <section id="introduction-hacking-religion" class="level1 unnumbered">
<h1 class="unnumbered">Introduction: Hacking Religion</h1> <h1 class="unnumbered">Introduction: Hacking Religion</h1>
<section id="why-this-book" class="level2"> <section id="why-this-book" class="level2">
<h2 class="anchored" data-anchor-id="why-this-book">Why this book?</h2> <h2 class="anchored" data-anchor-id="why-this-book">Why this book?</h2>
<p>Data science is quickly consolidating as a new field, with new tools and user communities emerging seemingly every week! At the same time the field of academic research has opened up into new interdisciplinary vistas, with experts crossing over into new fields, transgressing disciplinary boundaries and deploying tools in new and unexpected ways to develop knowledge. There are many gaps yet to be filled, but one which I found to be particularly glaring is the lack of applied data science documentation around the subject of religion. On one hand, scholars who are working with cutting edge theory seldom pick up the emerging tools of data science. On the other hand, data scientists rarely go beyond dabbling in religious themes. This book aims to bring these two things together: introducing the tools of data science in an applied way, whilst introducing some of the complexities and cutting edge theories which help us to conceptualise and frame our understanding of this knowledge.</p> <p>Data science is quickly consolidating as a new field, with new tools and user communities emerging seemingly every week. At the same time the field of academic research has opened up into new interdisciplinary vistas, with experts crossing over into new fields, transgressing disciplinary boundaries and deploying tools in new and unexpected ways to develop knowledge. There are many gaps yet to be filled, but one which I found to be particularly glaring is the lack of applied data science documentation around the subject of religion. On one hand, scholars who are working with cutting edge theory seldom pick up these emerging tools of data science. On the other hand, data scientists rarely go beyond dabbling in religious themes, leaving quite a lot of really interesting theoretical research untouched. This book aims to bring these two things together: introducing the tools of data science in an applied way, whilst introducing some of the complexities and cutting edge theories which help us to conceptualise and frame our understanding of this knowledge regarding religion in the world around us.</p>
</section> </section>
<section id="the-hacker-way" class="level2"> <section id="the-hacker-way" class="level2">
<h2 class="anchored" data-anchor-id="the-hacker-way">The hacker way</h2> <h2 class="anchored" data-anchor-id="the-hacker-way">The hacker way</h2>
<p>Its worth emphasising at the outset that this isnt meant to be a generic data science book. My own training as a researcher lies in the field of religious ethics, and my engagement with digital technology has, from the very start, been a context for exploring matters of personal values, and social action. A fair bit of ink has been spilled in books, magazines, blogs, zines, and tweets unpacking what exactly it means to be a “hacker”. Pressing beyond some of the more superficial cultural stereotypes, I want to explain a bit here about how hacking can be a much more substantial vision for ethical engagement with technology and social transformation.</p> <p>Its worth emphasising at the outset that this isnt meant to be a generic data science book. My own training as a researcher lies in the field of religious ethics, and my engagement with digital technology has, from the very start, been a context for exploring matters of personal values and social action. A fair bit of ink has been spilled in books, magazines, blogs and zines unpacking what exactly it means to be a “hacker”. Pressing beyond some of the more superficial cultural stereotypes, I want to explain a bit here about how hacking can be a much more substantial vision for ethical engagement with technology and social transformation.</p>
<p>Back in the 1980s Steven Levy tried to capture some of this in his book “Hackers: Heroes of the Computer Revolution”. As Levy put it, the “hacker ethic” included: (1) sharing, (2) openness, (3) decentralisation, (4) free access to computers and (5) world improvement. The key point here is that hacking isnt just about writing and breaking code, or testing and finding weaknesses in computer systems and networks. It can be a more substantial ethical code.</p> <p>Back in the 1980s Steven Levy tried to capture some of this in his book “Hackers: Heroes of the Computer Revolution”. As Levy put it, the “hacker ethic” included: (1) sharing, (2) openness, (3) decentralisation, (4) free access to computers and (5) world improvement. The key point here is that hacking isnt just about writing and breaking code, or testing and finding weaknesses in computer systems and networks. There is often a more substantial underpinning ethical code which dovetails with on-the-surface matters of curiosity and craft.</p>
<p>This emphasis on ethics is especially important when were doing data science because this kind of research work will put you in positions of influence and grant you power over others. You might think this seems a bit overstated, but it never ceases to amaze me how much bringing a bar chart which succinctly shows some sort of social trend can sway a conversation or decision making process. There is something unusually persuasive that comes with the combination of aesthetics, data and storytelling. Ive met many people who have come to data science out of a desire to bring about social transformation in some sphere of life. People want to use technology and communication to make the world better. However, its possible that this can quickly get out of hand. Its important to have a clear sense of what sorts of convictions guide your work in this field, a “hacker code” of sorts. With this in mind, Id like to share with you my own set of principles:</p> <p>This emphasis on ethics is especially important when were doing data science because this kind of research work will put you in positions of influence and bestow upon you a certain amount of social influence. You might think this seems a bit overstated, but it never ceases to amaze me how much bringing a bar chart which succinctly shows a social trend can sway a conversation or decision making process. There is something unusually persuasive that comes with the combination of aesthetics, data and storytelling. Ive met many people who have come to data science out of a desire to bring about social transformation in some sphere of life. People want to use technology and communication to make the world better. However, its possible that this can quickly get out of hand. With this in mind, Ive found that it can be important to have a clear sense of what sorts of convictions guide your work in this field: a “hacker code” of sorts. Here are the principles that I have settled on in my own practice of hacking religion:</p>
<p>It never ceases to amaze me how often people think that, when theyre working for something they think is important it is acceptable to conceal bad news or amplify good or compelling information beyond its real scope. There are always consequences, eventually. When people realise youve been misleading or manipulating them your platform and credibility will evaporate. Good work mixed with bad will all get tossed out. And sometimes, our convictions can lead us beyond our true apprehension of a situation.</p>
<p>Presenting through “facts” an argument can become unnaturally compelling. Wrapping those facts up in something that uses colour, line and shape in a way that is aesthetically pleasing, even beautiful, enhances this allure even further. As you take up the hacker way, its vitally important that you always strive to tell the truth. This includes a willingness to acknowledge the limits of your information, and to share the whole set of information. The easiest way to do this is to work with visualation in a responsible way (Ill get into this a bit more in Chapter 1) and to open up your data and code to scrutiny. By allowing others to try, criticise, edit, and reappropriate your code and data in their own ways, you contribute to knowledge help to build up a community of accountability. The upside of this is that its also a lot more fun and interesting to work alongside others.</p>
<p>Far too often, scholarly research (and theology) has been criticised for being disconnected from reality, making abstract pie-in-the-sky claims about how life should be lived. When exposed to the uncomfortable pressures of reality, these claims can crumble, or even turn sinister. One of the upsides of working with empirical research is that you have a chance to engage with the real world. For this reason, I love to do ethics in a way that arises - bottom-up - from real world experiences and relationships. Theres also the potential that when we make choices based on reliable information drawn from everyday reality like this our policy and culture can be more resilient and accountable. This also works well with the hacker ethos of “learning by doing” and its this approach that guides my approach in this book. This isnt just a book about data analysis, Im proposing an approach which might be thought of as research-as-code, where you write out instructions to execute the various steps of work. The upside of this is that other researchers can learn from your work, correct and build on it as part of the commons. It takes a bit more time to learn and set things up, but the upside is that youll gain access to a set of tools and a research philosophy which is much more powerful.</p>
<p>Heres a quick summary of these principles, which Ill return to periodically as we work through the coding and data in this book:</p>
<ol type="1"> <ol type="1">
<li>Tell the truth: Be candid about your limits, use visualisation responsibly</li> <li>Tell the truth: be candid about your limits, use visualisation responsibly</li>
<li>Work transparently: Open data, open code</li> <li>Work transparently: open data, open code</li>
<li>Work in community, draw others in by producing reproducible research</li> <li>Work in community: draw others in by producing reproducible research</li>
<li>Work with reality, learn by doing</li> <li>Work with reality and learn by doing</li>
</ol> </ol>
<p>It never ceases to amaze me how often people think that, when theyre working for something they think is important it is acceptable to conceal bad news or amplify good or compelling information beyond its real scope. There are always consequences, eventually. When people realise youve been misleading or manipulating them your platform and credibility will evaporate. Good work mixed with bad will all get tossed out. And sometimes, our convictions can lead us beyond our true apprehension of a situation.</p>
<p>Presenting through “facts” an argument can become unnaturally compelling. Wrapping those facts up in something that uses colour, line and shape in a way that is aesthetically pleasing, even beautiful, enhances this allure even further. As you craft your own set of hacker principles, its vitally important that you always strive to tell the truth. This includes a willingness to acknowledge the limits of your information, and to share the whole set of information. The easiest way to do this is to work with visualation in a responsible way (Ill get into this a bit more in Chapter 1) and to open up your data and code to scrutiny. By allowing others to try, criticise, edit, and reappropriate your code and data in their own ways, you contribute to knowledge and help to build up a community of accountability. The upside of this is that its also a lot more fun and interesting to work alongside others.</p>
<p>Far too often, scholarly research (and theology) has been criticised for being disconnected from reality, making abstract pie-in-the-sky claims about how life should be lived. When exposed to the uncomfortable pressures of reality, these claims can crumble, or even turn sinister. One of the upsides of working with empirical research is that you have a chance to engage with the real world. For this reason, I love to do ethics in a way that arises - bottom-up - from real world experiences and relationships. Theres also the potential (at least in the best case scenario) that when we make choices based on reliable information drawn from everyday reality our policy and culture can be more resilient and accountable. This also works well with the hacker ethos of “learning by doing” and its this approach that guides my approach in this book. This isnt just a book about data analysis, Im proposing an approach which might be thought of as research-as-code, where you write out instructions to execute the various steps of work. The upside of this is that other researchers can learn from your work, correct and build on it as part of an intellectual commons. It takes a bit more time to learn and set things up, but the upside is that youll gain access to a set of tools and a research philosophy which is much more powerful.</p>
<p>Ill return to these principles periodically as we work through the coding and data in this book.</p>
</section> </section>
<section id="learning-to-code-my-way" class="level2"> <section id="learning-to-code-my-way" class="level2">
<h2 class="anchored" data-anchor-id="learning-to-code-my-way">Learning to code: my way</h2> <h2 class="anchored" data-anchor-id="learning-to-code-my-way">Learning to code: my way</h2>
<p>This guide is a little different from other textbooks targetting learning to code. I remember when I was first starting out, I went through a fair few guides, and they all tended to spend about 200 pages on various theoretical bits, how you form an integer, or data structures, subroutines, the logical structure of algorithms or whatever. It was usuallyweeks of reading before I got to actually <em>do</em> anything. I know some people may prefer this approach, but I prefer a problem-focussed approach to learning. Give me something that is broken, or a problem to solve, which engages the things I want to figure out and the motivation for learning just comes much more naturally. And we know from research in cognitive science that these kinds of problem-focussed approaches can tend to faciliate faster learning and better retention, so its not just my personal preference, but also justified! It will be helpful for you to be aware of this approach when you get into the book as it explains some of the editorial choices Ive made and the way Ive structured things. Each chapter focusses on a <em>problem</em> which is particularly salient for the use of data science to conduct research into religion. That problem will be my focal point, guiding choices of specific aspects of programming to introduce to you as we work our way around that data set and some of the crucial questions that arise in terms of how we handle it. If you find this approach unsatisfying, luckily there are a number of really terrific guides which lay things out slowly and methodically and I will explicitly signpost some of these along the way so that you can do a “deep dive” when you feel like it. Otherwise, Ill take an accelerated approach to this introduction to data science in R. I expect that you will identify adjacent resources and perhaps even come up with your own creative approaches along the way, which incidentally is how real data science tends to work in practice.</p> <p>Alongside these guiding principles, its also worth saying a bit about how I like to design teaching and learning. Some readers may notice that this guide is a little different from other textbooks targetting learning to code. I remember when I was first starting out, I went through a fair few guides, and they all tended to spend about 200 pages on various theoretical bits, how you form an integer, or data structures, subroutines, the logical structure of algorithms etc. etc. It was usually weeks of reading before I got to actually <em>do</em> anything. I know some people may prefer this approach, but I prefer a problem-focussed approach to learning. Give me something that is broken, or a problem to solve, which engages the things I want to figure out and the motivation for learning just comes much more naturally. And we know from research in cognitive science that these kinds of problem-focussed approaches can tend to faciliate faster learning and better retention, so its not just my personal preference, but also justified! It will be helpful for you to be aware of this approach when you get into the book as it explains some of the editorial choices Ive made and the way Ive structured things.</p>
<p>There are a range of terrific textbooks out there which cover all these elements in greater depth and more slowly. In particular, Id recommend that many readers will want to check out Hadley Wickhams “R For Data Science” book. Ill include marginal notes in this guide pointing to sections of that book, and a few others which unpack the basic mechanics of R in more detail.</p> <p>Each chapter focusses on a series of <em>problems</em> which are particularly salient for the use of data science to conduct research into religion. These problems will be my focal point, guiding choices of specific aspects of programming to introduce to you as we work our way around that data set and some of the crucial questions that arise in terms of how we handle it. If you find this approach unsatisfying, luckily there are a number of really terrific guides which lay things out slowly and methodically and I will explicitly signpost some of these along the way so that you can do a “deep dive” when you feel like it. You can also find a list of resources in Appendix B to this book. Otherwise, Ill take an accelerated approach to this introduction to data science in R. I expect that you will identify adjacent resources and perhaps even come up with your own creative approaches along the way, which incidentally is how real data science tends to work in practice.</p>
<p>There are a range of terrific textbooks out there which cover all these elements in greater depth and more slowly. In particular, Id recommend that many readers will want to check out Hadley Wickhams “<a href="https://r4ds.hadley.nz/">R For Data Science</a>” book. Ill include marginal notes in this guide pointing to sections of that book, and a few others which unpack the basic mechanics of R in more detail.</p>
</section> </section>
<section id="getting-set-up" class="level2"> <section id="getting-set-up" class="level2">
<h2 class="anchored" data-anchor-id="getting-set-up">Getting set up</h2> <h2 class="anchored" data-anchor-id="getting-set-up">Getting set up</h2>
<p>Every single tool, programming language and data set we refer to in this book is free and open source. These tools have been produced by professionals and volunteers who are passionate about data science and research and want to share it with the world, and in order to do this (and following the “hacker way”) theyve made these tools freely available. This also means that you arent restricted to a specific proprietary, expensive, or unavailable piece of software to do this work. Ill make a few opinionated recommendations here based on my own preferences and experience, but its really up to your own style and approach. In fact, given that this is an open source textbook, you can even propose additions to this chapter explaining other tools youve found that you want to share with others.</p> <p>Every single tool, programming language and data set we refer to in this book is free and open source. These tools have been produced by professionals and volunteers who are passionate about data science and research and want to share it with the world, and in order to do this (and following the “hacker way”) theyve made these tools freely available. This also means that you arent restricted to a specific proprietary, expensive, or unavailable piece of software to do this work. Ill make a few opinionated recommendations here based on my own preferences and experience, but its really up to your own style and approach. In fact, given that this is an open source textbook, you can even propose additions to this chapter explaining other tools youve found that you want to share with others.</p>
<p>There are, right now, primarily two languages that statisticians and data scientists use for this kind of programmatic data science: python and R. Each language has its merits and I wont rehash the debates between various factions. For this book, well be using the R language. This is, in part, because the R user community and libraries tend to scale a bit better for the work that Im commending in this book. However, its entirely possible that one could use python for all these exercises, and perhaps in the future well have volume two of this book outlining python approaches to the same operations.</p> <p>There are, right now, primarily two languages that statisticians and data scientists use for this kind of programmatic data science: python and R. Each language has its merits and I wont rehash the debates between various factions. For this book, well be using the R language. This is, in part, because the R user community and libraries tend to scale a bit better for the work that Im commending in this book. However, its entirely possible that one could use python for all these exercises, and perhaps in the future well have volume two of this book outlining python approaches to the same operations.</p>
<p>Bearing this in mind, the first step youll need to take is to download and install R. You can find instructions and install packages for a wide range of hardware on the The Comprehensive R Archive Network (or “CRAN”): https://cran.rstudio.com. Once youve installed R, youve got some choices to make about the kind of programming environment youd like to use. You can just use a plain text editor like <code>textedit</code> to write your code and then execute your programs using the R software youve just installed. However, most users, myself included, tend to use an integrated development environment (or “IDE”). This is usually another software package with a guided user interface and some visual elements that make it faster to write and test your code. Some IDE packages, will have built-in reference tools so you can look up options for libraries you use in your code, they will allow you to visualise the results of your code execution, and perhaps most important of all, will enable you to execute your programs line by line so you can spot errors more quickly (we call this “debugging”). The two most popular IDE platforms for R coding at the time of writing this textbook are RStudio and Visual Studio. You should download and try out both and stick with your favourite, as the differences are largely aesthetic. I use a combination of RStudio and an enhanced plain text editor Sublime Text for my coding.</p> <p>Bearing this in mind, the first step youll need to take is to download and install R. You can find instructions and install packages for a wide range of hardware on a key resource online for R programmers: the The Comprehensive R Archive Network (or “CRAN”): https://cran.rstudio.com. Once youve installed R, youve got some choices to make about the kind of programming environment youd like to use. You can just use a plain text editor like <code>textedit</code> to write your code and then execute your programs using the R software youve just installed. However, most users, myself included, tend to use an integrated development environment (or “IDE”). This is usually another software package with a guided user interface and some visual elements that make it faster to write and test your code. Some IDE packages, will have built-in reference tools so you can look up options for libraries you use in your code, they will allow you to visualise the results of your code execution, and perhaps most important of all, will enable you to execute your programs line by line so you can spot errors more quickly (we call this “debugging”). The two most popular IDE platforms for R coding at the time of writing this textbook are RStudio and Visual Studio. You should download and try out both and stick with your favourite, as the differences are largely aesthetic. I use a combination of RStudio and an enhanced plain text editor Sublime Text for my coding.</p>
<p>Once you have R and your pick of an IDE, you are ready to go! Proceed to the next chapter and well dive right in and get started!</p> <p>Once you have R and your pick of an IDE, you are ready to go! Proceed to the next chapter and well dive right in and get started!</p>
@ -318,10 +328,9 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
// clear code selection // clear code selection
e.clearSelection(); e.clearSelection();
}); });
function tippyHover(el, contentFn) { function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = { const config = {
allowHTML: true, allowHTML: true,
content: contentFn,
maxWidth: 500, maxWidth: 500,
delay: 100, delay: 100,
arrow: false, arrow: false,
@ -331,8 +340,17 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
interactive: true, interactive: true,
interactiveBorder: 10, interactiveBorder: 10,
theme: 'quarto', theme: 'quarto',
placement: 'bottom-start' placement: 'bottom-start',
}; };
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config); window.tippy(el, config);
} }
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
@ -346,6 +364,125 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
const note = window.document.getElementById(id); const note = window.document.getElementById(id);
return note.innerHTML; return note.innerHTML;
}); });
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
} }
let selectedAnnoteEl; let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => { const selectorForAnnotation = ( cell, annotation) => {
@ -388,6 +525,7 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
} }
div.style.top = top - 2 + "px"; div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px"; div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) { if (gutterDiv === null) {
gutterDiv = window.document.createElement("div"); gutterDiv = window.document.createElement("div");
@ -413,6 +551,32 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
}); });
selectedAnnoteEl = undefined; selectedAnnoteEl = undefined;
}; };
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT // Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) { for (const annoteDlNode of annoteDls) {
@ -476,8 +640,8 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
<div class="nav-page nav-page-previous"> <div class="nav-page nav-page-previous">
</div> </div>
<div class="nav-page nav-page-next"> <div class="nav-page nav-page-next">
<a href="./chapter_1.html" class="pagination-link"> <a href="./chapter_1.html" class="pagination-link" aria-label="<span class='chapter-number'>1</span>&nbsp; <span class='chapter-title'>A note if you are reading raw code here rather than the book. You can ignore the code provided here below, as this is just intended to set up the basic workspace for our future work and is also necessary for the `quarto` application we use to build this book. Quarto is an application which blends together text and blocks of code. The text begins below at &quot;The 2021 UK Census&quot;</span>">
<span class="nav-page-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">The 2021 UK Census</span></span> <i class="bi bi-arrow-right-short"></i> <span class="nav-page-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">A note if you are reading raw code here rather than the book. You can ignore the code provided here below, as this is just intended to set up the basic workspace for our future work and is also necessary for the <code>quarto</code> application we use to build this book. Quarto is an application which blends together text and blocks of code. The text begins below at “The 2021 UK Census</span></span> <i class="bi bi-arrow-right-short"></i>
</a> </a>
</div> </div>
</nav> </nav>
@ -485,4 +649,5 @@ window.document.addEventListener("DOMContentLoaded", function (event) {
</body></html> </body></html>