Wordcloud generated using R based on data scrapes from Scottish Gvt. Climate Challenge Fund applications
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
1.1 KiB

  1. # Script to create wordcloud from CCF applications stored in CSV
  2. # PRELIMINARIES, load libraries and data
  3. require(tm)
  4. require(wordcloud)
  5. require(stringr)
  6. # Clean data set
  7. ccf <- Corpus (DirSource("CCF/"))
  8. ccf <- tm_map(ccf, stripWhitespace)
  9. ccf <- tm_map(ccf, tolower)
  10. ccf <- tm_map(ccf, removeNumbers)
  11. ccf <- tm_map(ccf, removePunctuation)
  12. # tweak by adding extra stopwords, "will"
  13. myStopwords <- c(stopwords('english'), "will", "project")
  14. # tweak by removing extra stopwords, in this example "r"
  15. # idx <- which(myStopwords == "r")
  16. # myStopwords <- myStopwords[-idx]
  17. ccf <- tm_map(ccf, removeWords, myStopwords)
  18. # ccf <- tm_map(ccf, stemDocument)
  19. ccf <- tm_map(ccf, PlainTextDocument)
  20. # Create word cloud
  21. # wordcloud(ccf, scale=c(5,0.5), max.words=100, random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark2"))
  22. # wordcloud(ccf, scale=c(8,.2), min.freq=2, max.words=Inf, random.order=FALSE, rot.per=.15, colors=brewer.pal(8, "Dark2"))
  23. wordcloud(ccf, scale=c(5,.3), max.words=250, random.order=FALSE, rot.per=0.35, use.r.layout=FALSE, colors=brewer.pal(8, "Dark3"))