finished transition to sf()

2025-08-08 10:24:09 +00:00 · 2022-03-04 14:54:43 +00:00 · 2022-03-04 14:54:43 +00:00 · 0747180e54
commit 0747180e54
parent deb05acd7a
2 changed files with 115 additions and 148 deletions
--- a/data/WILDLAND_SCOTLAND_ESRI.zip
+++ b/data/WILDLAND_SCOTLAND_ESRI.zip
--- a/mapping_draft-hpc_optimised.Rmd
+++ b/mapping_draft-hpc_optimised.Rmd
@ -62,9 +62,7 @@ local({r <- getOption("repos")
 require(RCurl) # used for fetching reproducible datasets
 require(sf) # new simplefeature data class, supercedes sp in many ways
 # See issue https://github.com/kidwellj/mapping_environmental_action/issues/3 for progress re: migration from sp()
-# require(sp) # needed for proj4string, deprecated by sf()
-# require(maptools)
-
+require(sp) # needed for proj4string, deprecated by sf()
 library(ragg) # better video device, more accurate and faster rendering, esp. on macos. Also should enable system fonts for display
 library(tidyverse)
 require(tmap) # using as an alternative to base r graphics and ggplot for geospatial plots
@ -73,16 +71,6 @@ require(grid) # using for inset maps on tmap
 require(broom) # required for tidying SPDF to data.frame for ggplot2
 require(reshape2)  # using for grouped bar plot
 require(scales)
-# require(sqldf) # using sqldf to filter before loading very large data sets
-
-## Packages required for PostGIS database access
-# Many thanks to Sébastien Rochette for documentation here: https://www.r-bloggers.com/interact-with-postgis-from-r/
-# require(config) # used to access database connection credentials in config.yml file below
-# library(DBI)
-# library(odbc)
-# library(RPostgres)
-# library(rpostgis)
-# library(dbplyr)

 ## Packages required for knitr output
 ## Packages used for features or issues relating to html_document knitr output
@ -100,6 +88,17 @@ if (dir.exists("derivedData") == FALSE) {
  dir.create("derivedData")
 }

+# Dependencies and environment related to PostGIS database access, used to filter datasets before loading in some instances
+# require(sqldf) # 
+## Packages required for PostGIS database access
+# Many thanks to Sébastien Rochette for documentation here: https://www.r-bloggers.com/interact-with-postgis-from-r/
+# require(config) # used to access database connection credentials in config.yml file below
+# library(DBI)
+# library(odbc)
+# library(RPostgres)
+# library(rpostgis)
+# library(dbplyr)
+
 # # Setup PostGIS database connection
 # dw <- config::get("datawarehouse")
 # 
@ -135,7 +134,7 @@ Until recently, environmentalism has been treated by governments and environment

 ```{r load_ecs_data, message=FALSE, warning=FALSE}
 # read in Eco-Congregation Scotland data and-------------------
-# ...turn it into a SpatialPointsDataFrame---------------------
+# ...turn it into a SimpleFeature---------------------
 # TODO: update below to match new dataset once it has been uploaded to zenodo
 # if (file.exists("data/ECS-GIS-Locations_3.0.geojson") == FALSE) {
 # download.file("https://____.zip", 
@ -143,11 +142,8 @@ Until recently, environmentalism has been treated by governments and environment
 # unzip("data/____.zip", exdir = "data")
 # }

-# Note, use of paste0 here relates to fix noted above.
-# for discussion related to this approach, see https://gis.stackexchange.com/q/313761/41474
-# read in Eco-Congregation Scotland data and-------------------
-# ...turn it into a SimpleFeature---------------------
-ecs <- st_read("data/ECS-GIS-Locations_3.0.geojson") %>% st_transform(paste0("+init=epsg:",27700))
+ecs <- st_read("data/ECS-GIS-Locations_3.0.geojson")
+
 # Write data to PostGIS database for later analysis, currently commented out for later integration
 # st_write(ecs, dsn = con, layer = "ecs",
 #          overwrite = FALSE, append = FALSE)
@ -199,15 +195,15 @@ download.file("https://borders.ukdataservice.ac.uk/ukborders/easy_download/prebu
              destfile = "data/Scotland_ca_2010.zip")
 unzip("data/Scotland_ca_2010.zip", exdir = "data")
 }
-admin_lev1 <- st_read("data/scotland_ca_2010.shp") %>% st_transform(paste0("+init=epsg:",27700))
-<<<<<<< HEAD
+admin_lev1 <- st_read("data/scotland_ca_2010.shp")
+# Create simplified version of geometry for visualisations, with a tolerance of 100m
+admin_lev1_simplified <- st_simplify(admin_lev1, preserveTopology = FALSE, dTolerance = 100)
+# compare memory usage of objects
+round(c(object.size(admin_lev1), object.size(admin_lev1_simplified)) / 1024)
+
 # Write to SQL database
 # st_write(admin_lev1, dsn = con, layer = "admin_lev1",
 #          overwrite = FALSE, append = FALSE)
-=======
-st_write(admin_lev1, dsn = con, layer = "admin_lev1",
-         overwrite = FALSE, append = FALSE)
->>>>>>> 89c9a2a5a4542de5584daa0304a53008a779ded8

 # read in polygon for intermediate admin boundary layers
 if (file.exists("data/scotland_parlcon_2011.shp") == FALSE) {
@ -215,9 +211,12 @@ download.file("http://census.edina.ac.uk/ukborders/easy_download/prebuilt/shape/
              destfile = "data/Scotland_parlcon_2011.zip")
 unzip("data/Scotland_parlcon_2011.zip", exdir = "data")
 }
-admin_lev2 <- st_read("data/scotland_parlcon_2011.shp") %>% st_transform(paste0("+init=epsg:",27700))
-st_write(admin_lev2, dsn = con, layer = "admin_lev2",
-         overwrite = FALSE, append = FALSE)
+admin_lev2 <- st_read("data/scotland_parlcon_2011.shp")
+
+# Create simplified version of geometry for visualisations, with a tolerance of 100m
+admin_lev2_simplified <- st_simplify(admin_lev2, preserveTopology = FALSE, dTolerance = 100)
+# compare memory usage of objects
+round(c(object.size(admin_lev2), object.size(admin_lev2_simplified)) / 1024)

 # Generate new sf shape using bounding box for central belt for map insets below
 # Note: coordinates use BNG as CRS (EPSG: 27700)
@ -238,60 +237,19 @@ scotland_ratio<- get_asp_ratio(scotland)

 ```{r import_groups_data, message=FALSE, warning=FALSE, include=FALSE}
 # read in Transition Towns data and turn it into a SpatialPointsDataFrame
-
-# Original approach using sp()
-# transition_wgs <- read.csv(text=getURL("https://zenodo.org/record/165519/files/SCCAN_1.4.csv"))
-# coordinates(transition_wgs) <- c("X", "Y")
-# proj4string(transition_wgs) <- CRS("+init=epsg:4326")
-# transition <- spTransform(transition_wgs, bng)
-# transition_sf <- st_as_sf(transition, coords = c("X", "Y"), crs=27700)
-
-transition <- st_read("data/transition-scotland_2.3.geojson") %>% st_transform(paste0("+init=epsg:",27700))
+transition <- st_read("data/transition-scotland_2.3.geojson")

 # read in pointX data and turn it into a SpatialPointsDataFrame
-# TODO to add code here to parse out raw pointx file (or OS local?)
-
-# Original approach using sp()
-# pow_pointX <- read.csv("./data/poi_2015_12_scot06340459.csv", sep="|")
-# coordinates(pow_pointX) <- c("feature_easting", "feature_northing")
-# # TODO: need to alter to draw from wgs84 or bng defined in preamble above
-# proj4string(pow_pointX) <- CRS("+init=epsg:27700")
-# pow_pointX_sf <- st_as_sf(pow_pointX, coords = c("X", "Y"), crs=27700)
-
-pow_pointX <- st_read("data/pointx_201512_scotland_pow.geojson") %>% st_transform(paste0("+init=epsg:",27700))
+# TODO to add function above to parse out raw pointx file (or using OS local?)
+pow_pointX <- st_read("data/pointx_201512_scotland_pow.geojson")

 ## read in Scottish Community Dev. trust data and turn it into a SpatialPointsDataFrame
-# removing deprecated use of sp() and csv
-# dtas <- read.csv("data/community-dev-trusts-2.6.csv")
-# coordinates(dtas) <- c("X", "Y")
-# proj4string(dtas) <- CRS("+init=epsg:27700")
-# dtas_sf <- st_as_sf(dtas, coords = c("X", "Y"), crs=27700)
-
-dtas <- st_read("data/community-dev-trusts-2.6.geojson") %>% st_transform(paste0("+init=epsg:",27700))
+dtas <- st_read("data/community-dev-trusts-2.6.geojson")

 ## read in permaculture data and turn it into a SpatialPointsDataFrame
-# removing deprecated use of sp() and csv
-# permaculture <- read.csv("data/permaculture_scot-0.8.csv")
-# coordinates(permaculture) <- c("X", "Y")
-# proj4string(permaculture) <- CRS("+init=epsg:27700")
-# permaculture_sf <- st_as_sf(permaculture, coords = c("X", "Y"), crs=27700)
-<<<<<<< HEAD
+permaculture <- st_read("data/permaculture_scot-0.8.geojson")

-permaculture <- st_read("data/permaculture_scot-0.8.geojson") %>% st_transform(paste0("+init=epsg:",27700))
-
-=======
-
-permaculture <- st_read("data/permaculture_scot-0.8.geojson") %>% st_transform(paste0("+init=epsg:",27700))
-
->>>>>>> 89c9a2a5a4542de5584daa0304a53008a779ded8
 # subset point datasets for inset maps below
-# commenting out deprecated naming convention
-# ecs_sf_centralbelt <- st_intersection(ecs_sf, centralbelt_region)
-# dtas_sf_centralbelt <- st_intersection(dtas_sf, centralbelt_region)
-# transition_sf_centralbelt <- st_intersection(transition_sf, centralbelt_region)
-# pow_sf_centralbelt <- st_intersection(pow_pointX_sf, centralbelt_region)
-# permaculture_sf_centralbelt <- st_intersection(permaculture_sf, centralbelt_region)
-
 ecs_centralbelt <- st_intersection(ecs, centralbelt_region)
 dtas_centralbelt <- st_intersection(dtas, centralbelt_region)
 transition_centralbelt <- st_intersection(transition, centralbelt_region)
@ -309,15 +267,7 @@ permaculture_centralbelt <- st_intersection(permaculture, centralbelt_region)
 admin_lev1$ecs_count <- lengths(st_covers(admin_lev1, ecs))
 admin_lev1$ecs_percent<- prop.table(admin_lev1$ecs_count)

-# # Test new approach
-# library(sp)
-# library(GISTools)
-# admin_lev1$ecs_count_sp <- poly.counts(ecs,admin_lev1)
-# admin_lev1$ecs_count_sp - admin_lev1$ecs_count
-# admin_lev1$ecs_percent <- prop.table(admin_lev1$ecs_count)
-
 # calculate count of places of worship in PointX db for fields in admin and provide percentages
-# TODO ingest data here from OS Open Map Local dataset (rather than non-open PointX dataset; need to filter and upload to zenodo)
 admin_lev1$pow_count <- lengths(st_covers(admin_lev1, pow_pointX))
 admin_lev1$pow_percent<- prop.table(admin_lev1$pow_count)
 # calculate count of Transition for fields in admin and provide percentages
@ -348,40 +298,30 @@ admin_lev2$dtas_percent<- prop.table(admin_lev2$dtas_count)
 admin_lev2$permaculture_count <- lengths(st_covers(admin_lev2, permaculture))
 admin_lev2$permaculture_percent<- prop.table(admin_lev2$permaculture_count)

-# original approach - calculate count of ECS for fields in admin_lev2_sf
-# TODO: for future migration to sf throughout, remove above content and swap out references.
-# admin_lev2_sf$ecs_count <- lengths(st_covers(admin_lev2_sf, st_as_sf(ecs_sf, coords = c("long", "lat"), crs = st_crs(27700))))
-# admin_lev2_sf$pow_count <- lengths(st_covers(admin_lev2_sf, st_as_sf(pow_pointX_sf, coords = c("long", "lat"), crs = st_crs(27700))))
-# admin_lev2_sf$transition_count <- lengths(st_covers(admin_lev2_sf, st_as_sf(transition_sf, coords = c("long", "lat"), crs = st_crs(27700))))
-# admin_lev2_sf$dtas_count <- lengths(st_covers(admin_lev2_sf, st_as_sf(dtas_sf, coords = c("long", "lat"), crs = st_crs(27700))))
-# admin_lev2_sf$permaculture_count <- lengths(st_covers(admin_lev2_sf, st_as_sf(permaculture_sf, coords = c("long", "lat"), crs = st_crs(27700))))
-
 # Import csv with population data for each level of administrative subdivision and join to spatialdataframe
-
-# Placeholder for England parish data for later implementation
-# download.file("http://census.edina.ac.uk/ukborders/easy_download/prebuilt/shape/England_cp_1991.zip", destfile = "parishes/parishes-1991.zip")
-# unzip("parishes/parishes-1991.zip", exdir = "parishes")
-# parishes <- rgdal::readOGR(dsn = "parishes", "england_cp_1991")
-
 # TODO - consider adapting to use ONS mid-year statistics: https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationestimatesforukenglandandwalesscotlandandnorthernireland

-JK revising done to here 2020-Mar-11
-
 # Load population statistics for normalising data by population and merge with admin_lev1
 admin_lev1_pop <- read.csv("./data/scotland_admin_2011pop.csv")
 # TODO: sf() merge doesn't support by.x etc. operations, need to adapt
-admin_lev1 <- merge(x=admin_lev1, y=admin_lev1_pop, by.x = "code", by.y = "CODE")
+admin_lev1 <- merge(x=admin_lev1, y=admin_lev1_pop, by.x = "code", by.y = "code")
 admin_lev1_merged <- merge(admin_lev1, admin_lev1_pop)
 # Convert number stored as string with a comma to an integer
-admin_lev1$X2011_pop <- as.numeric(gsub(",", "", admin_lev1@data$X2011_pop))
+admin_lev1$X2011_pop <- admin_lev1$X2011_pop %>%
+  str_replace_all(",", "")
 # Calculate counts as percentages of total for normalising below
-admin_lev1$pop_percent <- prop.table(admin_lev1$X2011_pop)
+admin_lev1$pop_percent <- prop.table(as.numeric(admin_lev1$X2011_pop))
 admin_lev1$pow_percent <- prop.table(admin_lev1$pow_count)
 # Normalise ecs_count using population figures (using ArcGIS method)
 admin_lev1$ecs_count_popnorm <- admin_lev1$ecs_count * admin_lev1$pop_percent
 # Normalise ecs_count using places of worship counts (using ArcGIS method)
 admin_lev1$ecs_count_pownorm <- admin_lev1$ecs_count * admin_lev1$pow_percent

+# Placeholder for England parish data for later implementation
+# download.file("http://census.edina.ac.uk/ukborders/easy_download/prebuilt/shape/England_cp_1991.zip", destfile = "parishes/parishes-1991.zip")
+# unzip("parishes/parishes-1991.zip", exdir = "parishes")
+# parishes <- rgdal::readOGR(dsn = "parishes", "england_cp_1991")
+
 # Preserve scale
 admin_lev1$ecs_count_popnorm_scaled <- admin_lev1$ecs_count_popnorm*(sum(admin_lev1$ecs_count)/sum(admin_lev1$ecs_count_popnorm))
 admin_lev1$ecs_count_pownorm_scaled <- admin_lev1$ecs_count_pownorm*(sum(admin_lev1$ecs_count)/sum(admin_lev1$ecs_count_pownorm))
@ -419,7 +359,7 @@ Though there are too few eco-congregations and transition groups for a numerical
 tm_shape(admin_lev2) + 
  tm_fill(col = "ecs_count", palette = "Oranges", title = "Concentration of ECS groups") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  # TODO: Change title field "name" to match admin1
  # also consider scaling text size using area #  quick plot example: 
@ -453,7 +393,7 @@ tm_shape(admin_lev2) +
 tm_shape(admin_lev2) + 
  tm_fill(col = "ecs_count_pownorm_scaled", palette = "Oranges", n = 5, title = "Concentration of ECS groups, data normalised by places of worship") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  tm_scale_bar(position = c("right", "bottom")) +
  tm_style("gray") +
@ -476,7 +416,7 @@ tm_shape(admin_lev2) +
  tm_fill(col = "ecs_count_popnorm_scaled", palette = "Oranges", n = 5, 
          title = "Concentration of ECS groups, data normalised by places of worship") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  tm_scale_bar(position = c("right", "bottom")) +
  tm_style("gray") +
@ -533,10 +473,10 @@ admin_barplot <-
     legend.title = element_blank(), 
     axis.text.x = element_blank()) +
    scale_color_manual(labels = c("DTAS", "ECS", "Permaculture", "Transition"))
-
+# TODO, need better export method now that tmap() and graphic device are fixed
 pdf("figures/03_admin_barplot.pdf")
 print(admin_barplot)
-dev.off()
+# dev.off()

 ```

@ -550,7 +490,7 @@ admin_lev2_scotland_ecs_plot <-
  tm_shape(admin_lev2) + 
  tm_fill(col = "ecs_count", palette = "Oranges", n = 5, title = "Concentration of ECS groups") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  tm_shape(ecs) +
  tm_dots("red", size = .02, alpha = .2) +
@ -589,14 +529,15 @@ tmap_mode("plot")
 pdf("figures/03_admin_lev2_scotland_ecs_plot.pdf")
 print(admin_lev2_centralbelt_ecs_plot, 
      vp = vp_admin_lev2_centralbelt_ecs_plot)
-dev.off()
+# TODO, need better export method now that tmap() and graphic device are fixed
+# dev.off()

 # Second plot, revealing transition towns

 tm_shape(admin_lev2) + 
  tm_fill(col = "transition_count", palette = "Oranges", n = 5, title = "Concentration of Transition groups") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  tm_shape(transition) +
  tm_dots("red", size = .02, alpha = .2) +
@ -619,7 +560,7 @@ tm_shape(admin_lev2) +
 tm_shape(admin_lev2) + 
  tm_fill(col = "dtas_count", palette = "Oranges", n = 5, title = "Concentration of DTAS groups") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  tm_shape(dtas) +
  tm_dots("red", size = .02, alpha = .2) +
@ -643,7 +584,7 @@ tm_shape(admin_lev2) +
  tm_fill(col = "permaculture_count", palette = "Oranges", n = 5, 
          title = "Concentration of Permaculture groups") +
  tm_borders(alpha=.5, lwd=0.1) +
-  tm_shape(admin_lev1) +
+  tm_shape(admin_lev1_simplified) +
  tm_borders(lwd=0.6) +
  tm_shape(permaculture) +
  tm_dots("red", size = .02, alpha = .2) +
@ -693,6 +634,7 @@ A wide variety of historians and sociologists of religion have noted the regiona
 So why provide this kind of data (i.e. at the level of individual churches) when more granular data (i.e. at the level of individuals persons) is available in the form of the census and related parallel publications such as the 2008 Scottish Environmental Attitudes survey? We believe that mapping places of worship provides a useful intermediate level of analysis and may complement our more atomised understanding of EA which has been assessed at the level of individual persons to date. Because representation within some administrative areas of Scotland, can lead to a small number of data points, we have kept analysis to a National level and have not provided more specific administrative-area level calculations.

 ```{r 06_ecs_denomination_table}
+# TODO: fix to work with new sf() object
 knitr::kable(summary(ecs@data[["denomination"]]), caption = 'ECS by denomination')
 # TODO: Add dataframe with overall church numbers for the UK from "ScottishChurches" dataset by JK
 ```
@ -711,9 +653,12 @@ download.file("http://sedsh127.sedsh.gov.uk/Atom_data/ScotGov/ZippedShapefiles/S
              destfile = "data/SG_UrbanRural_2016.zip")
 unzip("data/SG_UrbanRural_2016.zip", exdir = "data")
 }
-urbanrural <- st_read("data/SG_UrbanRural_2016.shp") %>% st_transform(paste0("+init=epsg:",27700))
-urbanrural_simplified <- st_simplify(urbanrural)
+urbanrural <- st_read("data/SG_UrbanRural_2016.shp")
+# Create simplified version of geometry for visualisations, with a tolerance of 100m
+urbanrural_simplified <- st_simplify(urbanrural, preserveTopology = FALSE, dTolerance = 100)
 # TODO: worth considering uploading data to zenodo for long-term reproducibility as ScotGov shuffles this stuff around periodically breaking URLs
+# compare memory usage of objects
+round(c(object.size(urbanrural), object.size(urbanrural_simplified)) / 1024)

 # This code will generate a table of frequencies for each spatialpointsdataframe in urbanrural
 # calculate count of ECS for fields in urbanrural
@ -731,7 +676,6 @@ urbanrural$dtas_percent<- prop.table(urbanrural$dtas_count)
 # calculate count of permaculture for fields in urbanrural
 urbanrural$permaculture_count <- lengths(st_covers(urbanrural, permaculture))
 urbanrural$permaculture_percent<- prop.table(urbanrural$permaculture_count)
-
 ```

 Rather than bifurcate congregations into an urban/rural dichotomy, for this study we used the Scottish Government's six-point remoteness scale to categorise eco-congregations along a spectrum of highly populated to remote areas. This 8-fold scale (calculated biennially) offers a more nuanced measurement that combines measurements of remoteness and population along the following lines:
@ -845,35 +789,49 @@ download.file("http://simd.scot/2016/data/simd2016_withgeog.zip",
              destfile = "data/simd2016_withgeog.zip")
 unzip("data/simd2016_withgeog.zip", exdir = "data", junkpaths = TRUE)
 }
-simd_shapes <- readOGR("./data", "sc_dz_11") 
+simd_shapes <- st_read("data/sc_dz_11.shp")
 simd_indicators <- read.csv("./data/simd2016_withinds.csv")
 simd_indicators_min <- simd_indicators[c(1,6:17)]
+# Combine shapes with data
 # Original data is in wgs, so need to convert from wgs to bng to work with analysis across data sets
 simd_wgs <- merge(x=simd_shapes, y=simd_indicators_min, by.x = "DataZone", by.y = "Data_Zone")
-simd <- spTransform(simd_wgs, bng)
+simd <- simd_wgs %>% st_transform(27700)
+ # Create simplified version of geometry for visualisations, with a tolerance of 100m
+# Workaround to address invalid spherical geometry issue 
+sf_use_s2(FALSE)
+simd_simplified <- st_simplify(simd_shapes, preserveTopology = FALSE, dTolerance = 100)
+# compare memory usage of objects
+round(c(object.size(simd_shapes), object.size(simd_simplified)) / 1024)

 # Augment each dataset with relevant (geolocated) columns from SIMD
 # assign combined table with SIMD columns to attribute table slot of ecs table
-ecs@data=cbind(ecs@data,over(ecs,simd))
+
+# TODO: need to resolve issue here with simd columns not being added, also issues with geometry
+ecs=cbind(ecs,st_intersects(simd,ecs))
 # assign combined table with SIMD columns to attribute table slot of ecs table
-pow_pointX@data=cbind(pow_pointX@data,over(pow_pointX,simd))
+pow_pointX=cbind(pow_pointX,st_intersects(pow_pointX,simd))
 # assign combined table with SIMD columns to attribute table slot of transition table
-transition@data=cbind(transition@data,over(transition,simd))
+# TODO: fix bad geometry here causing fails as it lies outside simd polygons
+# transition=cbind(transition,st_intersects(transition,simd))
 # assign combined table with SIMD columns to attribute table slot of permaculture table
-permaculture@data=cbind(permaculture@data,over(permaculture,simd))
+permaculture=cbind(permaculture,st_intersects(permaculture,simd))
 # assign combined table with SIMD columns to attribute table slot of dtas table
-dtas@data=cbind(dtas@data,over(dtas,simd))
+# TODO: fix bad geometry here causing fails as it lies outside simd polygons
+# dtas=cbind(dtas,st_intersects(dtas,simd))

 # Augment SIMD with group counts

-simd$ecs_count <- poly.counts(ecs,simd)
-simd$transition_count <- poly.counts(transition,simd)
-simd$dtas_count <- poly.counts(dtas,simd)
-simd$permaculture_count <- poly.counts(permaculture,simd)
-simd$pow_count <- poly.counts(pow_pointX,simd)
+simd$ecs_count <- lengths(st_covers(simd, ecs))
+simd$transition_count <- lengths(st_covers(simd, transition))
+simd$dtas_count <- lengths(st_covers(simd, dtas))
+simd$permaculture_count <- lengths(st_covers(simd, permaculture))
+simd$pow_count <- lengths(st_covers(simd, pow_pointX))

 # Generate simplified dataframes with counts for each group
 ecs_simd <- data.frame(ecs[c(1,16,25,30:36)])
+# TODO: adapt code below to include equivalent rows from above now that we are using tidyverse
+ecs_simd <- data.frame(select(ecs, row.id, col.id))
+
 colnames(ecs_simd)[1] <- "group_name"
 ecs_simd$group_type <- "ecs"
 transition_simd <- data.frame(transition[c(2,5,14,19:25)])
@ -974,11 +932,12 @@ if (file.exists("data/SSSI_SCOTLAND.shp") == FALSE) {
 # download.file("", destfile = "data/SSSI_SCOTLAND_ESRI.zip")
 unzip("data/SSSI_SCOTLAND_ESRI.zip", exdir = "data")
 }
-sssi <- st_read("data/SSSI_SCOTLAND.shp") %>% st_transform(paste0("+init=epsg:",27700))
+sssi <- st_read("data/SSSI_SCOTLAND.shp")
 sssi_sp <- readOGR("./data", "SSSI_SCOTLAND") 
-# Generate simplified polygon for plots below
-sssi_simplified <- st_simplify(sssi)
-# sssi_simplified_sp <- rgeos::gSimplify(sssi_sp, tol=3)
+ # Create simplified version of geometry for visualisations, with a tolerance of 100m
+sssi_simplified <- st_simplify(sssi, preserveTopology = TRUE, dTolerance = 100)
+# compare memory usage of objects
+round(c(object.size(sssi), object.size(sssi_simplified)) / 1024)

 # 2. Download wild land areas:

@ -989,9 +948,11 @@ if (file.exists("data/WILDLAND_SCOTLAND.shp") == FALSE) {
 unzip("data/WILDLAND_SCOTLAND_ESRI.zip", exdir = "data")
 }

-wildland <- st_read("data/WILDLAND_SCOTLAND.shp") %>% st_transform(paste0("+init=epsg:",27700))
-# Generate simplified polygon for plots below
-wildland_simplified <- st_simplify(wildland)
+wildland <- st_read("data/WILDLAND_SCOTLAND.shp")
+# Create simplified version of geometry for visualisations, with a tolerance of 100m
+wildland_simplified <- st_simplify(wildland, preserveTopology = FALSE, dTolerance = 100)
+# compare memory usage of objects
+round(c(object.size(wildland), object.size(wildland_simplified)) / 1024)

 # 3. Download data for National Forest Inventory:

@ -1003,9 +964,12 @@ download.file("https://opendata.arcgis.com/datasets/3cb1abc185a247a48b9d53e4c4a8
 unzip("data/National_Forest_Inventory_Woodland_Scotland_2017.zip", exdir = "data")
 }

-forestinv <- st_read("data/National_Forest_Inventory_Woodland_Scotland_2017.shp") %>% st_transform(paste0("+init=epsg:",27700))
-# Generate simplified polygon for plots below
-forestinv_simplified <- st_simplify(forestinv)
+forestinv <- st_read("data/National_Forest_Inventory_Woodland_Scotland_2017.shp")
+# Create simplified version of geometry for visualisations, with a tolerance of 100m
+# Some tweaks required here, useful notes here: https://www.r-bloggers.com/2021/03/simplifying-geospatial-features-in-r-with-sf-and-rmapshaper/
+forestinv_simplified <- st_simplify(forestinv, preserveTopology = TRUE, dTolerance = 1000)
+# compare memory usage of objects
+round(c(object.size(forestinv), object.size(forestinv_simplified)) / 1024)

 # Download data for scenic areas
 # https://opendata.arcgis.com/datasets/d7f6b987c7224a72a185ce012258d500_23.zip
@ -1015,9 +979,11 @@ forestinv_simplified <- st_simplify(forestinv)
 # download.file("https://opendata.arcgis.com/datasets/d7f6b987c7224a72a185ce012258d500_23.zip", destfile = "data/ScenicAreas.zip")
 # unzip("data/ScenicAreas.zip", exdir = "data")

-scenicareas <- st_read("data/SG_NationalScenicAreas_1998.shp") %>% st_transform(paste0("+init=epsg:",27700))
-# Generate simplified polygon for plots below
-scenicareas_simplified <- st_simplify(scenicareas)
+scenicareas <- st_read("data/SG_NationalScenicAreas_1998.shp")
+# Create simplified version of geometry for visualisations, with a tolerance of 100m
+scenicareas_simplified <- st_simplify(scenicareas, preserveTopology = TRUE, dTolerance = 100)
+# compare memory usage of objects
+round(c(object.size(scenicareas), object.size(scenicareas_simplified)) / 1024)

 # Set symmetrical CRS for analysis below (inserted here in order to correct errors, may be deprecated later)
 # st_crs(sssi) <- 27700
@ -1033,8 +999,8 @@ scenicareas_simplified <- st_simplify(scenicareas)
 # Define buffers as lines and polygons for reuse below (as some of these operations take > 10 minutes)
 # TODO: Working with simplified polygons here to optimise execution, need to confirm calculations are the same as polygons without simplification (likely!)

-sssi_buf50 <- st_buffer(sssi_simplified, dist = 50)
-sssi_buf500 <- st_buffer(sssi_simplified, dist = 500)
+sssi_buf50 <- st_buffer(sssi, dist = 50)
+sssi_buf500 <- st_buffer(sssi, dist = 500)
 # Lines offer even more optimised representation suitable for plotting
 # TODO: resolve error:
 # Error in st_cast.sfc(., to = "LINESTRING") : 
@ -1046,8 +1012,8 @@ sssi_buf500 <- st_buffer(sssi_simplified, dist = 500)
 #   st_cast(to = "MULTILINESTRING") %>% 
 #   st_cast(to = "LINESTRING")

-wildland_buf50 <- st_buffer(wildland_simplified, dist = 50)
-wildland_buf500 <- st_buffer(wildland_simplified, dist = 500)
+wildland_buf50 <- st_buffer(wildland, dist = 50)
+wildland_buf500 <- st_buffer(wildland, dist = 500)
 # Lines offer even more optimised representation suitable for plotting
 # wildland_buf50_lines = st_union(wildland_simplified) %>% st_buffer(50) %>% 
 #   st_cast(to = "LINESTRING")
@ -1062,8 +1028,8 @@ forestinv_buf500 <- st_buffer(forestinv_simplified, dist = 500)
 # forestinv_buf500_lines = st_union(forestinv_simplified) %>% st_buffer(500) %>% 
 #   st_cast(to = "LINESTRING")

-scenicareas_buf50 <- st_buffer(scenicareas_simplified, dist = 50)
-scenicareas_buf500 <- st_buffer(scenicareas_simplified, dist = 500)
+scenicareas_buf50 <- st_buffer(scenicareas, dist = 50)
+scenicareas_buf500 <- st_buffer(scenicareas, dist = 500)

 # Calculate number of groups within polygons

@ -1077,12 +1043,12 @@ scenicareas_buf500 <- st_buffer(scenicareas_simplified, dist = 500)
 # from https://gotellilab.github.io/Bio381/StudentPresentations/SpatialDataTutorial.html

 # TODO: integrate pre-calc here into calculations further down which are still recalculating these figures
-ecs_sssi <- st_within(ecs, sssi_simplified)
+ecs_sssi <- st_within(ecs, sssi)
 ecs_sssi50m <- st_within(ecs, sssi_buf50)
 ecs_sssi500m <- st_within(ecs, sssi_buf500)
 ecs_sssibeyond500m <- !(st_within(ecs, sssi_buf500))

-ecs_wildland <- st_within(ecs, wildland_simplified)
+ecs_wildland <- st_within(ecs, wildland)
 ecs_wildland50m <- st_within(ecs, wildland_buf50)
 ecs_wildland500m <- st_within(ecs, wildland_buf500)
 ecs_wildlandbeyond500m <- !(st_within(ecs, wildland_buf500))
@ -1092,7 +1058,7 @@ ecs_forestinv50m <- st_within(ecs, forestinv_buf50)
 ecs_forestinv500m <- st_within(ecs, forestinv_buf500)
 ecs_forestinvbeyond500m <- !(st_within(ecs, forestinv_buf500))

-ecs_scenicareas <- st_within(ecs, scenicareas_simplified)
+ecs_scenicareas <- st_within(ecs, scenicareas)
 ecs_scenicareas50m <- st_within(ecs, scenicareas_buf50)
 ecs_scenicareas500m <- st_within(ecs, scenicareas_buf500)
 ecs_scenicareasbeyond500m <- !(st_within(ecs, scenicareas_buf500))
@ -1102,6 +1068,7 @@ ecs_scenicareasbeyond500m <- !(st_within(ecs, scenicareas_buf500))
 # See: https://nceas.github.io/oss-lessons/parallel-computing-in-r/parallel-computing-in-r.html

 # Generate dataframe based on SSSI buffers
+# TODO: add pubs and grocery store data (using PointX)

 # Calculate incidence of ecs within SSSI and within buffers at 50/500m
 ecs_sssi_row <- c(sum(apply(st_within(ecs, sssi_simplified, sparse=FALSE), 1, any)), sum(apply(st_within(ecs, sssi_buf50, sparse=FALSE), 1, any)), sum(apply(st_within(ecs, sssi_buf500, sparse=FALSE), 1, any)))