hacking_religion_textbook/hacking_religion/docs/intro.html

474 lines
25 KiB
HTML
Raw Normal View History

2023-10-02 10:51:32 +00:00
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.3.450">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>Hacking Religion: TRS &amp; Data Science in Action - 1&nbsp; Introduction: Hacking Religion</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
</style>
<script src="site_libs/quarto-nav/quarto-nav.js"></script>
<script src="site_libs/quarto-nav/headroom.min.js"></script>
<script src="site_libs/clipboard/clipboard.min.js"></script>
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="site_libs/quarto-search/fuse.min.js"></script>
<script src="site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="./">
<link href="./chapter_1.html" rel="next">
<link href="./index.html" rel="prev">
<script src="site_libs/quarto-html/quarto.js"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "sidebar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "start",
"type": "textbox",
"limit": 20,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
</head>
<body class="nav-sidebar floating">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top">
<nav class="quarto-secondary-nav">
<div class="container-fluid d-flex">
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i>
</button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./intro.html"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction: Hacking Religion</span></a></li></ol></nav>
<a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a>
<button type="button" class="btn quarto-search-button" aria-label="" onclick="window.quartoOpenSearch();">
<i class="bi bi-search"></i>
</button>
</div>
</nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal sidebar-navigation floating overflow-auto">
<div class="pt-lg-2 mt-2 text-left sidebar-header">
<div class="sidebar-title mb-0 py-0">
<a href="./">Hacking Religion: TRS &amp; Data Science in Action</a>
</div>
</div>
<div class="mt-2 flex-shrink-0 align-items-center">
<div class="sidebar-search">
<div id="quarto-search" class="" title="Search"></div>
</div>
</div>
<div class="sidebar-menu-container">
<ul class="list-unstyled mt-1">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Preface</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./intro.html" class="sidebar-item-text sidebar-link active">
<span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction: Hacking Religion</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./chapter_1.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">The 2021 UK Census</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./chapter_2.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Survey Data: Spotlight Project</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./chapter_3.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Mapping churches: geospatial data science</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./chapter_4.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">Data scraping, corpus analysis and wordclouds</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./summary.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Summary</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./references.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">References</span></a>
</div>
</li>
</ul>
</div>
</nav>
<div id="quarto-sidebar-glass" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar,#quarto-sidebar-glass"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#who-this-book-is-for" id="toc-who-this-book-is-for" class="nav-link active" data-scroll-target="#who-this-book-is-for"><span class="header-section-number">1.1</span> Who this book is for</a></li>
<li><a href="#why-this-book" id="toc-why-this-book" class="nav-link" data-scroll-target="#why-this-book"><span class="header-section-number">1.2</span> Why this book?</a></li>
<li><a href="#the-hacker-way" id="toc-the-hacker-way" class="nav-link" data-scroll-target="#the-hacker-way"><span class="header-section-number">1.3</span> The hacker way</a></li>
<li><a href="#why-programmatic-data-science" id="toc-why-programmatic-data-science" class="nav-link" data-scroll-target="#why-programmatic-data-science"><span class="header-section-number">1.4</span> Why programmatic data science?</a></li>
<li><a href="#learning-to-code-my-way" id="toc-learning-to-code-my-way" class="nav-link" data-scroll-target="#learning-to-code-my-way"><span class="header-section-number">1.5</span> Learning to code: my way</a></li>
<li><a href="#getting-set-up" id="toc-getting-set-up" class="nav-link" data-scroll-target="#getting-set-up"><span class="header-section-number">1.6</span> Getting set up</a></li>
</ul>
</nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">Introduction: Hacking Religion</span></h1>
</div>
<div class="quarto-title-meta">
</div>
</header>
<section id="who-this-book-is-for" class="level2" data-number="1.1">
<h2 data-number="1.1" class="anchored" data-anchor-id="who-this-book-is-for"><span class="header-section-number">1.1</span> Who this book is for</h2>
</section>
<section id="why-this-book" class="level2" data-number="1.2">
<h2 data-number="1.2" class="anchored" data-anchor-id="why-this-book"><span class="header-section-number">1.2</span> Why this book?</h2>
</section>
<section id="the-hacker-way" class="level2" data-number="1.3">
<h2 data-number="1.3" class="anchored" data-anchor-id="the-hacker-way"><span class="header-section-number">1.3</span> The hacker way</h2>
<ol type="1">
<li><p>Tell the truth</p></li>
<li><p>Do not deceive using beauty</p></li>
<li><p>Work transparently: research as open code using open data</p></li>
<li><p>Draw others in: produce reproducible research</p></li>
<li><p>Learn by doing</p></li>
</ol>
</section>
<section id="why-programmatic-data-science" class="level2" data-number="1.4">
<h2 data-number="1.4" class="anchored" data-anchor-id="why-programmatic-data-science"><span class="header-section-number">1.4</span> Why programmatic data science?</h2>
<p>This isnt just a book about data analysis, Im proposing an approach which might be thought of as research-as-code, where you write out instructions to execute the various steps of work. The upside of this is that other researchers can learn from your work, correct and build on it as part of the commons. It takes a bit more time to learn and set things up, but the upside is that youll gain access to a set of tools and a research philosophy which is much more powerful.</p>
</section>
<section id="learning-to-code-my-way" class="level2" data-number="1.5">
<h2 data-number="1.5" class="anchored" data-anchor-id="learning-to-code-my-way"><span class="header-section-number">1.5</span> Learning to code: my way</h2>
<p>This guide is a little different from other textbooks targetting learning to code. I remember when I was first starting out, I went through a fair few guides, and they all tended to spend about 200 pages on various theoretical bits, how you form an integer, or data structures, subroutines, or whatever, such that it was weeks before I got to actually <em>do</em> anything. I know some people, may prefer this approach, but I dramatically prefer a problem-focussed approach to learning. Give me something that is broken, or a problem to solve, which engages the things I want to figure out and the motivation for learning just comes much more naturally. And we know from research in cognitive science that these kinds of problem-focussed approaches can tend to faciliate faster learning and better retention, so its not just my personal preference, but also justified! It will be helpful for you to be aware of this approach when you get into the book as it explains some of the editorial choices Ive made and the way Ive structured things. Each chapter focusses on a <em>problem</em> which is particularly salient for the use of data science to conduct research into religion. That problem will be my focal point, guiding choices of specific aspects of programming to introduce to you as we work our way around that data set and some of the crucial questions that arise in terms of how we handle it. If you find this approach unsatisfying, luckily there are a number of really terrific guides which lay things out slowly and methodically and I will explicitly signpost some of these along the way so that you can do a “deep dive” when you feel like it. Otherwise, Ill take an accelerated approach to this introduction to data science in R. I expect that you will identify adjacent resources and perhaps even come up with your own creative approaches along the way, which incidentally is how real data science tends to work in practice.</p>
<p>There are a range of terrific textbooks out there which cover all these elements in greater depth and more slowly. In particular, Id recommend that many readers will want to check out Hadley Wickhams “R For Data Science” book. Ill include marginal notes in this guide pointing to sections of that book, and a few others which unpack the basic mechanics of R in more detail.</p>
</section>
<section id="getting-set-up" class="level2" data-number="1.6">
<h2 data-number="1.6" class="anchored" data-anchor-id="getting-set-up"><span class="header-section-number">1.6</span> Getting set up</h2>
<p>Every single tool, programming language and data set we refer to in this book is free and open source. These tools have been produced by professionals and volunteers who are passionate about data science and research and want to share it with the world, and in order to do this (and following the “hacker way”) theyve made these tools freely available. This also means that you arent restricted to a specific proprietary, expensive, or unavailable piece of software to do this work. Ill make a few opinionated recommendations here based on my own preferences and experience, but its really up to your own style and approach. In fact, given that this is an open source textbook, you can even propose additions to this chapter explaining other tools youve found that you want to share with others.</p>
<p>There are, right now, primarily two languages that statisticians and data scientists use for this kind of programmatic data science: python and R. Each language has its merits and I wont rehash the debates between various factions. For this book, well be using the R language. This is, in part, because the R user community and libraries tend to scale a bit better for the work that Im commending in this book. However, its entirely possible that one could use python for all these exercises, and perhaps in the future well have volume two of this book outlining python approaches to the same operations.</p>
<p>Bearing this in mind, the first step youll need to take is to download and install R. You can find instructions and install packages for a wide range of hardware on the The Comprehensive R Archive Network (or “CRAN”): https://cran.rstudio.com. Once youve installed R, youve got some choices to make about the kind of programming environment youd like to use. You can just use a plain text editor like <code>textedit</code> to write your code and then execute your programs using the R software youve just installed. However, most users, myself included, tend to use an integrated development environment (or “IDE”). This is usually another software package with a guided user interface and some visual elements that make it faster to write and test your code. Some IDE packages, will have built-in reference tools so you can look up options for libraries you use in your code, they will allow you to visualise the results of your code execution, and perhaps most important of all, will enable you to execute your programs line by line so you can spot errors more quickly (we call this “debugging”). The two most popular IDE platforms for R coding at the time of writing this textbook are RStudio and Visual Studio. You should download and try out both and stick with your favourite, as the differences are largely aesthetic. I use a combination of RStudio and an enhanced plain text editor Sublime Text for my coding.</p>
<p>Once you have R and your pick of an IDE, you are ready to go! Proceed to the next chapter and well dive right in and get started!</p>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const clipboard = new window.ClipboardJS('.code-copy-button', {
text: function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
<nav class="page-navigation">
<div class="nav-page nav-page-previous">
<a href="./index.html" class="pagination-link">
<i class="bi bi-arrow-left-short"></i> <span class="nav-page-text">Preface</span>
</a>
</div>
<div class="nav-page nav-page-next">
<a href="./chapter_1.html" class="pagination-link">
<span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">The 2021 UK Census</span></span> <i class="bi bi-arrow-right-short"></i>
</a>
</div>
</nav>
</div> <!-- /content -->
</body></html>