diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..07fe41c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# GitHub syntax highlighting +pixi.lock linguist-language=YAML linguist-generated=true diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml new file mode 100644 index 0000000..a529de4 --- /dev/null +++ b/.github/workflows/pr.yml @@ -0,0 +1,37 @@ +name: Pull Request Checks + +on: + pull_request: + branches: [main] + +jobs: + build-check: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + + - name: Install R + uses: r-lib/actions/setup-r@v2 + with: + r-version: '4.4.1' + use-public-rspm: true + + - name: Install R Dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache-version: 2 + + - name: Additional Setup + run: | + # Install Python: + sudo apt-get update + sudo apt-get install python3 + sudo apt-get install python3-pip + python3 -m pip install jupyter jupyter-cache + + - name: Build Quarto Project + run: quarto render \ No newline at end of file diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..80f109b --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,44 @@ +on: + workflow_dispatch: + push: + branches: main + +name: Quarto Publish + +jobs: + build-deploy: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + + - name: Install R + uses: r-lib/actions/setup-r@v2 + with: + r-version: '4.4.1' + use-public-rspm: true + + - name: Install R Dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + cache-version: 2 + + - name: Additional Setup + run: | + # Install Python: + sudo apt-get update + sudo apt-get install python3 + sudo apt-get install python3-pip + python3 -m pip install jupyter jupyter-cache + + - name: Render and Publish + uses: quarto-dev/quarto-actions/publish@v2 + with: + target: gh-pages + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 586bb91..b4e12fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,37 +1,10 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -*.html -_book -codeExamples* -data* -slides/libs/ -slides/*files* -*.pdf -*.geojson -*.gpkg -*gpx* +docs + +/.quarto/ +_freeze/ *.csv -*.xls -*.zip -*.Rds -TDS -slides/references.bib -*.tex -aq.* -airquality.r -courses/2day/OTP/graphs/default/Graph.obj -courses/2day/OTP/graphs/default/IOW_DEM.tif -courses/2day/OTP/graphs/default/isle-of-wight.osm.pbf -courses/2day/OTP/graphs/default/router-config.json -courses/2day/OTP/otp.jar -practicals/OTP/graphs/default/IOW_DEM.tif -practicals/OTP/graphs/default/isle-of-wight.osm.pbf -practicals/OTP/graphs/default/router-config.json -practicals/OTP/otp.jar -practicals/OTP/graphs/default/Graph.obj -lucene/ -*.docx -practicals/otp_parallel_log.txt -otp_TDS +*.ics + +# pixi environments +.pixi +*.egg-info diff --git a/2024-overview.qmd b/2024-overview.qmd deleted file mode 100644 index 496c5b3..0000000 --- a/2024-overview.qmd +++ /dev/null @@ -1,11 +0,0 @@ ---- -format: gfm ---- - - -# Session 1 - -https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md - -# Session 2 - diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..5a54cfa --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,28 @@ +Package: TDStests +Title: What the Package Does (One Line, Title Case) +Version: 0.0.0.9000 +Authors@R: + person("First", "Last", , "first.last@example.com", role = c("aut", "cre"), + comment = c(ORCID = "YOUR-ORCID-ID")) +Description: What the package does (one paragraph). +License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a + license +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.3.2 +Imports: + tidyverse, + sf, + quarto, + stats19, + nycflights13, + remotes, + spDataLarge, + DT, + calendar, + reticulate, + stplanr, + tmap, + spData +Remotes: + nowosad/spDataLarge diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/README.Rmd b/README.Rmd deleted file mode 100644 index 0045058..0000000 --- a/README.Rmd +++ /dev/null @@ -1,180 +0,0 @@ ---- -output: github_document -bibliography: tds.bib ---- - - - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -library(tidyverse) -``` - - -```{r, eval=FALSE, echo=FALSE} -refs1 = RefManageR::ReadZotero(group = "418217", .params = list(collection = "R38L2EXB", limit = 100)) -refs2 = RefManageR::ReadZotero(group = "418217", .params = list(collection = "JFR868KJ", limit = 100)) -RefManageR::WriteBib(c(refs1, refs2), "tds.bib") -``` - - -# TDS (Transport Data Science) - -This is a GitHub Repository (repo for short) that supports teaching of the Transport Data Science module at the University of Leeds. -The module can be taken by students on the [Data Science and Analytics](https://courses.leeds.ac.uk/i071/data-science-and-analytics-msc) and the [Transport Planning and the Environment](https://courses.leeds.ac.uk/a386/transport-planning-and-the-environment-msc) MSc courses. - - - - - - - -- On the University's system (official): [mytimetable.leeds.ac.uk](https://mytimetable.leeds.ac.uk/link?timetable.id=202324!module!D5179CB14D503D52757F4BE89B1C998B) -- In ical format (for import into Google/Outlook/other Calendar systems): [timetable.ics](timetable.ics) -- As a .csv file (for easy reading as data): https://github.com/ITSLeeds/TDS/blob/master/timetable.csv (see table below) - - - -```{r, message=FALSE, echo=FALSE} -timetable = read_csv("timetable.csv") -timetable %>% - mutate(`Duration (Hours)` = duration) %>% - select(-duration) %>% - rename_with(str_to_title) %>% - knitr::kable() -``` - - - - - -# Prerequisites - -## Software - -Although you are free to use any software for the course, the emphasis on reproducibility means that popular data science languages such as R and Python are *highly* recommended. - -The teaching will be delivered in R. -For this module you therefore need to have up-to-date versions of R and RStudio installed on a computer you have access to: - -- R from [cran.r-project.org](https://cran.r-project.org/) -- RStudio from [rstudio.com](https://rstudio.com/products/rstudio/download/#download) -- R packages, which can be installed by opening RStudio and typing `install.packages("stats19")` in the R console, for example. - -You should have the latest stable release of R (4.3.0 or above) and be comfortable setting-up any addition software tools you need for your work. -Should have access to a computer with decent resources (e.g. a laptop with 8 GB of more RAM). - -See [Section 1.5 of the online guide Reproducible Road Safety Research with R](https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio) for instructions on how to install key packages we will use in the module.^[ -For further guidance on setting-up your computer to run R and RStudio for spatial data, see these links, we recommend -Chapter 2 of Geocomputation with R (the Prerequisites section contains links for installing spatial software on Mac, Linux and Windows): https://geocompr.robinlovelace.net/spatial-class.html and Chapter 2 of the online book *Efficient R Programming*, particularly sections 2.3 and 2.5, for details on R installation and [set-up](https://csgillespie.github.io/efficientR/set-up.html) and the -[project management section](https://csgillespie.github.io/efficientR/set-up.html#project-management). -] - -It is also recommended that you have installed and have experience with GitHub Desktop (or command line git on Linux and Mac), Docker, Python, QGIS and transport modelling tools such as SUMO and A/B Street. -These software packages will help with the course but are not essential. - -## Data science experience - -Attending the Introduction to R one-off 3 hour workshop (semester 1 Computer Skills workshop) and experience of using R (e.g. having used it for work, in previous degrees or having completed an online course) is essential. -Students can demonstrate this by showing evidence that they have worked with R before, have completed an online course such as the first 4 sessions in the RStudio Primers series https://rstudio.cloud/learn/primers or DataCamp’s Free Introduction to R course: https://www.datacamp.com/courses/free-introduction-to-r. -This is an advanced and research-led module. -Evidence of substantial programming and data science experience in previous professional or academic work, in languages such as R or Python, also constitutes sufficient pre-requisite knowledge for the course. - -## Course reading - -See the [handbook](handbook-tds.md). - - - - - - - - -# Assessment (for those doing this as credit-bearing) - -- You will build-up a portfolio of work -- 100% coursework assessed, you will submit by **Friday 17th May**: - - **a pdf document up to 10 pages long with figures, tables, references explaining how you used data science to research a transport problem** - - **reproducible code contained in an RMarkdown (.Rmd) document that produced the report** -- Written in RMarkdown - will be graded for reproducibility -- Code chunks and figures are encouraged -- You will submit a non-assessed 2 page pdf + Rmd report by **Friday 23rd February** - -# Issues and contributing - -Any feedback or contributions to this repo are welcome. -If you have a question please open an issue here (you'll need a GitHub account): https://github.com/ITSLeeds/TDS/issues - - - - - - -```{r, eval=FALSE, echo=FALSE} -download.file("https://github.com/ITSLeeds/TDS/releases/download/0.1/data.zip", destfile = "data.zip") -unzip("data.zip") -``` - - - -```{r, eval=FALSE, echo=FALSE} -install.packages("piggyback") -piggyback::pb_download("data.zip") - -# (This package was used to upload the data with:) -# piggyback::pb_upload(file = "data.zip") -# piggyback::pb_upload(file = "codeExamples.zip") -``` - - - -```{r, echo=FALSE, eval=FALSE} -# aim: sort csv file -e = read.csv("sample-data/everyone.csv") -dplyr::arrange(.data = e, n_coffee) -``` - - - - - - - -```{r, eval=FALSE, echo=FALSE} -# Sorting in the tidyverse -library(tidyverse) -e = read_csv("sample-data/everyone.csv") -e %>% arrange(n_coffee) -e %>% arrange(desc(n_coffee)) -``` - - - - - - - - - - -```{r, echo=FALSE, eval=FALSE} -# first time -# git checkout --orphan gh-pages -# git rm -rf . -# -# # create a hidden file .nojekyll -# touch .nojekyll -# git add .nojekyll -# -# git commit -m"Initial commit" -# git push origin gh-pages - -# Then... -source deploy.sh -``` - -# References diff --git a/README.md b/README.md index 37a55b1..df9d477 100644 --- a/README.md +++ b/README.md @@ -1,150 +1,13 @@ - -# TDS (Transport Data Science) +This repo contain code for the Transport Data Science module at the +Institute for Transport Studies, University of Leeds. -This is a GitHub Repository (repo for short) that supports teaching of -the Transport Data Science module at the University of Leeds. The module -can be taken by students on the [Data Science and -Analytics](https://courses.leeds.ac.uk/i071/data-science-and-analytics-msc) -and the [Transport Planning and the -Environment](https://courses.leeds.ac.uk/a386/transport-planning-and-the-environment-msc) -MSc courses. +To set it up we used commands such as: - - - - - -- On the University’s system (official): - [mytimetable.leeds.ac.uk](https://mytimetable.leeds.ac.uk/link?timetable.id=202324!module!D5179CB14D503D52757F4BE89B1C998B) -- In ical format (for import into Google/Outlook/other Calendar - systems): [timetable.ics](timetable.ics) -- As a .csv file (for easy reading as data): - (see table - below) - - - -| Summary | Description | Date | Location | Duration (Hours) | -|:-------------------------------|:---------------------------------------|:-----------|:-------------------------------------|-----------------:| -| TDS deadline 1 | Computer set-up | 2024-02-02 | Online - Teams | 0 | -| TDS Practical 1: intro | Introduction to transport data science | 2024-02-08 | Irene Manton North Cluster (7.96) | 3 | -| TDS Practical 2: od | Origin-destination data | 2024-02-15 | Irene Manton North Cluster (7.96) | 3 | -| TDS Practical 3: routing | Routing | 2024-02-22 | Irene Manton North Cluster (7.96) | 3 | -| TDS seminar 1 | Seminar 1: Tom Van Vuren, Amey and ITS | 2024-02-22 | Institute for Transport Studies 1.11 | 3 | -| TDS Practical 4: getting | Getting transport data | 2024-02-29 | Irene Manton North Cluster (7.96) | 3 | -| TDS seminar 2 | Seminar 2 Will Deakin, Network Rail | 2024-03-21 | Institute for Transport Studies 1.11 | 3 | -| TDS deadline 2 | Draft portfolio | 2024-04-22 | Online - Teams | 0 | -| TDS Practical 5: visualisation | Visualising transport data | 2024-05-02 | Irene Manton North Cluster (7.96) | 3 | -| TDS Practical 6: project | Project work | 2024-05-09 | Irene Manton North Cluster (7.96) | 3 | -| TDS deadline 3 | Deadline: coursework, 2pm | 2024-05-17 | Online - Teams | 0 | - - - - -# Prerequisites - -## Software - -Although you are free to use any software for the course, the emphasis -on reproducibility means that popular data science languages such as R -and Python are *highly* recommended. - -The teaching will be delivered in R. For this module you therefore need -to have up-to-date versions of R and RStudio installed on a computer you -have access to: - -- R from [cran.r-project.org](https://cran.r-project.org/) -- RStudio from - [rstudio.com](https://rstudio.com/products/rstudio/download/#download) -- R packages, which can be installed by opening RStudio and typing - `install.packages("stats19")` in the R console, for example. - -You should have the latest stable release of R (4.3.0 or above) and be -comfortable setting-up any addition software tools you need for your -work. Should have access to a computer with decent resources (e.g. a -laptop with 8 GB of more RAM). - -See [Section 1.5 of the online guide Reproducible Road Safety Research -with -R](https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio) -for instructions on how to install key packages we will use in the -module.[^1] - -It is also recommended that you have installed and have experience with -GitHub Desktop (or command line git on Linux and Mac), Docker, Python, -QGIS and transport modelling tools such as SUMO and A/B Street. These -software packages will help with the course but are not essential. - -## Data science experience - -Attending the Introduction to R one-off 3 hour workshop (semester 1 -Computer Skills workshop) and experience of using R (e.g. having used it -for work, in previous degrees or having completed an online course) is -essential. Students can demonstrate this by showing evidence that they -have worked with R before, have completed an online course such as the -first 4 sessions in the RStudio Primers series - or DataCamp’s Free Introduction to -R course: . -This is an advanced and research-led module. Evidence of substantial -programming and data science experience in previous professional or -academic work, in languages such as R or Python, also constitutes -sufficient pre-requisite knowledge for the course. - -## Course reading - -See the [handbook](handbook-tds.md). - - - - - - -# Assessment (for those doing this as credit-bearing) - -- You will build-up a portfolio of work -- 100% coursework assessed, you will submit by **Friday 17th May**: - - **a pdf document up to 10 pages long with figures, tables, - references explaining how you used data science to research a - transport problem** - - **reproducible code contained in an RMarkdown (.Rmd) document that - produced the report** -- Written in RMarkdown - will be graded for reproducibility -- Code chunks and figures are encouraged -- You will submit a non-assessed 2 page pdf + Rmd report by **Friday - 23rd February** - -# Issues and contributing - -Any feedback or contributions to this repo are welcome. If you have a -question please open an issue here (you’ll need a GitHub account): - - - - - - - - - - - - - - - - - -# References - -[^1]: For further guidance on setting-up your computer to run R and - RStudio for spatial data, see these links, we recommend Chapter 2 of - Geocomputation with R (the Prerequisites section contains links for - installing spatial software on Mac, Linux and Windows): - and Chapter - 2 of the online book *Efficient R Programming*, particularly - sections 2.3 and 2.5, for details on R installation and - [set-up](https://csgillespie.github.io/efficientR/set-up.html) and - the [project management - section](https://csgillespie.github.io/efficientR/set-up.html#project-management). +``` r +usethis::use_description() +usethis::use_package("stats19") +usethis::use_package("DT") +usethis::use_package("quarto") +``` diff --git a/README.qmd b/README.qmd new file mode 100644 index 0000000..dd6b3b0 --- /dev/null +++ b/README.qmd @@ -0,0 +1,25 @@ +--- +format: gfm +--- + +This repo contains code for the Transport Data Science module at the Institute for Transport Studies, University of Leeds. + +To set it up we used commands such as: + +```{r} +#| eval: false +usethis::use_description() +usethis::use_package("stats19") +usethis::use_package("DT") +usethis::use_package("quarto") +``` + +You can save presentations as PDF files with the following command: + +```{bash} +#| eval: false +#| echo: false +cd docs/slides +docker run --rm -t -v .:/slides astefanutti/decktape -s 1280x720 generic https://itsleeds.github.io/TDStests/slides/intro.html intro.pdf +firefox intro.pdf +``` \ No newline at end of file diff --git a/TDS.Rproj b/TDS.Rproj deleted file mode 100644 index 21a4da0..0000000 --- a/TDS.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 0000000..da03ed3 --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,38 @@ +project: + type: website + output-dir: docs + +website: + title: "Transport Data Science" + sidebar: + background: primary + # logo: "images/logo.png" + pinned: true + align: center + tools: + - icon: github + href: https://github.com/itsleeds/TDStests + text: GitHub + style: docked + contents: + - href: index.qmd + text: Home + - href: schedule.qmd + text: Schedule + - href: materials.qmd + text: Materials + - href: https://github.com/ITSLeeds/TDS/discussions + text: Forum + - href: marking-criteria.qmd + text: Marking Criteria + +format: + html: + theme: cosmo + # css: styles.css + toc: true + number-sections: true + +execute: + freeze: auto + diff --git a/bristol-exercise.R b/bristol-exercise.R deleted file mode 100644 index e69de29..0000000 diff --git a/catalogue.Rmd b/catalogue.Rmd deleted file mode 100644 index a90d07e..0000000 --- a/catalogue.Rmd +++ /dev/null @@ -1,153 +0,0 @@ ---- -bibliography: references.bib -output: github_document ---- - -# Catalogue - -This is the module catalogue for [TRAN5340M](http://webprod3.leeds.ac.uk/catalogue/dynmodules.asp?Y=201819&M=TRAN-5340M), Transport Data Science - -15 credits - -Module manager: Dr Robin [Lovelace](mailto:r.lovelace@leeds.ac.uk) - -Taught: Semester 2 - -Year running: 2018/19 - -[View Timetable](http://timetable.leeds.ac.uk/teaching/201819/reporting/Individual?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21) - -This module is not approved as an Elective - -## Module summary - -The quantity, diversity and availability of transport data is increasing rapidly, requiring skills in the management and interrogation of data and databases. -Recent years have seen a new wave of 'big data' and 'data science' changing the world, with the Harvard Business Review describing Data Science as the 'sexiest job of the 21st century' (see [hbr.org](https://hbr.org/2012/10/data-scientist-the-sexiest-job-of-the-21st-century)). -Transport researchers increasingly need to take data from a wide range of sources and perform non-standard analyses methods on them to inform the decision-making process. - -Despite these developments the transport sector has been slow to adapt to new methods and workflows. -The Transport Systems Catapult, for example, identified a skills gap in "skilled technical talent capable of handling and analysing very large datasets compiled from multiple sources" (see [ts.catapult.org.uk](https://ts.catapult.org.uk/news-events-gallery/news/report-warns-of-uk-skills-shortage-in-im-sector/)). - -This module takes a highly practical approach to learning about 'data science' tools and their application to investigating transport issues. -The focus is on practical data science, enabling attendees to make use of a wide range of datasets to answer real-world transport planning questions . - -## Objectives - -- Understand the structure of transport datasets: spatial, temporal and demographic. -- Understand how to obtain, clean and store transport related data. -- Gain proficiency in command-line tools for handling large transport datasets. -- Learn machine learning and data modelling techniques -- Produce data visualizations, static and interactive -- Learn where to find large transport datasets and assess data quality -- Learn how to join together the components of transport data science into a cohesive project portfolio - - - -## Learning outcomes - -Students will become confident at working in data science teams working on transport problems; in selecting appropriate tools to answer societal and business questions with a range of input data types; and understanding the wider implications of the increasing use of data science for transport planning. - -Specifically, learning outcomes will include the ability to: - -- Identify available datasets and access and clean them -- Combine datasets from multiple sources -- Understand what machine learning is, which problems it is appropriate for compared with traditional statistical approaches, and how to implement machine learning techniques -- Visualise and communicate the results of transport data science, and know about setting-up interactive web applications -- Deciding when to use local computing power vs cloud services -- Articulate the relevance and limitations of data-centric analysis applied to transport problems, compared with other methods - -## Syllabus - -- Software for practical data science -- The structure of transport data e.g. flows, incidents, origin/destination, GIS -- Data cleaning and subsetting -- Accessing data from web sources - -- Processing data using remote services and locally installed software -- Data visualization -- Machine learning -- Professional and ethical issues of big data in transport Transport data analysis - -## Subject specific skills - -Students will gain skills in: - -- Importing a range of transport data file formats from the command-line -- Setting-up data science projects to ensure reproducibility -- Data cleaning and manipulation -- Visualisation of large datasets - - -## Teaching methods - -

- - - - - -
Delivery typeNumberLength hoursStudent hours
Lecture51.005.00
Practical53.0015.00
Seminar51.005.00
Private study hours125.00
Total Contact hours25.00
Total hours (100hr per 10 credits)150.00
- -## Private study - -Students are expected to spend their study time on software set-up and worked examples, plus background reading for lectures, preparatory work for workshops and assessed coursework. Unsupervised teamwork practical sessions will be arranged to ensure a complete portfolio. - -## Rationale for teaching and learning methods and relationship to learning outcomes - -There is a blend of lectures, seminars and practical sessions designed to fulfil the learning outcomes. - -A combination of lectures and seminars will be used to teach and gain understanding of the core skills and principles e.g. database design. This will be supported and augmented by independent learning. - -A key aim of the module is to gain hands-on experience designing, implementing and communicating data analysis workflows during supervised practical sessions, and consolidated in unsupervised practical sessions. - -## Monitoring of student progress - -Progress will be monitored informally during supervised practical sessions. - -Formative feedback will be given after interim informal assessment of their in-progress project portfolio (week 4/5) - -## Methods of assessment - -**Coursework** - -
Assessment typeNotes% of formal assessment
PortfolioProject Portfolio100.00
Total percentage (Assessment Coursework)100.00
- - - -### Other information about assessment - -The project portfolio will be used as the basis for formative assessment mid-way through the semester. -This will highlight if any students are struggling. -If a portfolio fails the assessment criteria the student will have an opportunity to resubmit a report outlining what they have learned in the areas in which they are failing. - - -## Pre-requisites - -You will be expected to have a laptop with recent versions of R and RStudio installed. -See instructions here for getting an up-to-date R installation: https://csgillespie.github.io/efficientR/set-up.html#installing-r - -It is also recommended that you install GitHub Desktop (or command line git on Linux and Mac), Docker, Python, QGIS and transport modelling tools such as SUMO but these are not essential. - - -## Reading list - - - -### Core - -- Introduction to data science with R (available free [online](http://r4ds.had.co.nz/)) [@grolemund_r_2016-1] -- Introductory and advanced content on geographic data in R, especially the [transport chapter](http://geocompr.robinlovelace.net/transport.html) (available free [online](http://geocompr.robinlovelace.net/)) [@lovelace_geocomputation_2018a] -- Introductory textbook introducing machine learning with lucid prose and worked examples in R (available free [online](http://www-bcf.usc.edu/~gareth/ISL/index.html)) [@james_introduction_2013] - -### Optional - -- Book on transport data science in Python [@fox_data_2018] -- Paper on analysing OSM data in Python (available [online](https://arxiv.org/pdf/1611.01890)) [@boeing_osmnx:_2017] -- Paper on the **stplanr** paper for transport planning (available [online](https://cran.r-project.org/web/packages/stplanr/vignettes/stplanr-paper.html)) [@lovelace_stplanr:_2017] -- For context, a report on the 'transport data revolution' [@transportsystemscatapult_transport_2015] -- Seminal text on visualisation (available [online](https://github.com/yowenter/books/blob/master/Design/Edward%20R%20Tufte%20-The%20Visual%20Display%20of%20Quantitative%20Information.pdf), style available in the [tufte](https://github.com/rstudio/tufte) R package) [@tufte_visual_2001] -- A paper on the use of SmartCard data [@gschwender_using_2016] -- An academic paper describing the development of a web application for the Department for Transport [@lovelace_propensity_2017] - -## Bibliography - diff --git a/code-python/2python/__pycache__/mymodule.cpython-35.pyc b/code-python/2python/__pycache__/mymodule.cpython-35.pyc deleted file mode 100644 index 8d90418..0000000 Binary files a/code-python/2python/__pycache__/mymodule.cpython-35.pyc and /dev/null differ diff --git a/code-python/2python/basicPython.py b/code-python/2python/basicPython.py deleted file mode 100644 index aa2e9a4..0000000 --- a/code-python/2python/basicPython.py +++ /dev/null @@ -1,141 +0,0 @@ -# Aim: demonstrate Python's syntax -# Author: Charles Fox (updates by Robin Lovelace) -# Required packages: numpy, os, pandas -# E.g. from TDS docker image: https://hub.docker.com/r/robinlovelace/tds/ - -print('hello world') - -print(2+2) - -x=4 -print(x+1) - -y=2.5 -y=2. - -s='hello'+' '+'world' -print(s) - -type(x) - -x=1 -print('There are '+str(x)+' cars') - -print('Camera %i saw car %s with confidence %f'%(5,'AB05 FDE',0.6)) - -l=[1,2,'car', 5.0, [5,6] ] - -print(l[4]) -l[4]=9 - -l[0:4] - -s = 'hello world' -s[1] -s[0:4] - -l.append('newstring') -l.remove(2) - -d=dict() -d['Andrew Froggatt']='XY32 AJF' -d['Andrew Bower']='XZ85 AJB' - -for i in range(0,10,4): - print('hello world'+str(i)) - - -x=1 -while x<10: - x=x+1 - print('while '+str(x)) - if x==1: - print('x is one!') - else: - print('x is not one!') - -import os -os.chdir("code-python") -f=open('myfilename.txt', 'w') -f.write('hello') -f.close() - -for line in open('myfilename.txt'): - print(line) - -def myfunction(a,b): - return a+b - -x=1 -y=2 -z=myfunction(x,y) -print(z) - - -import math -print( math.sin(math.pi * 2) ) -print( math.exp(2) ) - - -import numpy as np -Z = np.zeros((2,3)) -I = np.eye(3) -A = np.matrix([[1,2],[3,4]]) -A[0,1] = 6 -print(A[0:2, 1]) -print(A.shape) -print(A.dot(A)) #matrix multiplication -print(A+1) -#add scalar - - - -from pylab import * -xs = [1,2,3] -ys = [10, 12, 11] -plot(xs, ys, 'bx') #blue x's -hold(True) #add future plots to the same figure -plot(xs, ys, 'r-', linewidth=5) #thick red lines -text(xs[1],ys[1],'some text') -title('my graph') -ylabel('vehicle count') -gca().invert_yaxis() #flip the y axis -xticks( range(0,16), ['car', 'van', 'truck'], rotation='vertical') - - - -import pandas as pd -import numpy as np -df=pd.read_csv('/headless/data/accidents/Accidents_2015.csv') -df.columns -df.shape -df['Number_of_Vehicles'] -df[0:20] -df.iloc[7] -df.iloc[::10, :] -df.as_matrix() - -I = np.eye(3) -#create a matrix -df = pd.DataFrame(I, columns=['col1','col2','col3'])#convert np to pd -df['col1']+df['col2'] -#add columns -df + df -#add all columns -del df['col1'] -#delete a column -df.append(df) -#append frames -df.sort_values('col2', ascending=False) -#sort by a column -df['newField']=0. -#add extra column -df[df['col2']==0] -#select rows WHERE true -df.merge(df) -#JOIN columns from frames -df = df.merge(df, how='outer').fillna(method='ffill') #align by time -df.to_csv('filename') -#save frame as CSV -os.chdir("..") # return to working directory - diff --git a/code-python/2python/filename b/code-python/2python/filename deleted file mode 100644 index 190eaad..0000000 --- a/code-python/2python/filename +++ /dev/null @@ -1,4 +0,0 @@ -,col2,col3,newField -0,0.0,0.0,0.0 -1,1.0,0.0,0.0 -2,0.0,1.0,0.0 diff --git a/code-python/2python/myfilename.txt b/code-python/2python/myfilename.txt deleted file mode 100644 index b6fc4c6..0000000 --- a/code-python/2python/myfilename.txt +++ /dev/null @@ -1 +0,0 @@ -hello \ No newline at end of file diff --git a/code-python/2python/mymodule.py b/code-python/2python/mymodule.py deleted file mode 100644 index 726396d..0000000 --- a/code-python/2python/mymodule.py +++ /dev/null @@ -1,2 +0,0 @@ -def myfunction(a,b): - return a+b \ No newline at end of file diff --git a/code-python/2python/myprogram.py b/code-python/2python/myprogram.py deleted file mode 100644 index 2661d3a..0000000 --- a/code-python/2python/myprogram.py +++ /dev/null @@ -1,4 +0,0 @@ -import mymodule -z = mymodule.myfunction(1,2) - - diff --git a/code-python/2python/myprogram_alternative.py b/code-python/2python/myprogram_alternative.py deleted file mode 100644 index e691d86..0000000 --- a/code-python/2python/myprogram_alternative.py +++ /dev/null @@ -1,2 +0,0 @@ -from mymodule import * -z = myfunction(1,2) \ No newline at end of file diff --git a/code-python/2python/skillsCheckSolution.py b/code-python/2python/skillsCheckSolution.py deleted file mode 100644 index cfd2a00..0000000 --- a/code-python/2python/skillsCheckSolution.py +++ /dev/null @@ -1,44 +0,0 @@ -import sys,re,os -import operator - -fn_data = "/headless/data/accidents/Accidents_2015.csv" -d=dict() -b=True - - -d["1"] = 0 -d["2"] = 0 -d["3"] = 0 -d["4"] = 0 -d["5"] = 0 -d["6"] = 0 -d["7"] = 0 - -d_nv=dict() -for i in range(0, 100): - ns=str(i) - d_nv[ns] = 0 - -for line in open(fn_data): - if b: - b=False - continue - - line=line.strip() - fields=line.split(",") - - (Accident_Index, Location_Easting_OSGR, Location_Northing_OSGR, myLongitude, myLatitude, Police_Force, Accident_Severity, Number_of_Vehicles, Number_of_Casualties, TheDate, Day_of_Week, TheTime, Local_Authority_District,Local_Authority_Highway,Road1_Class,Road1_Number,Road_Type,Speed_limit,Junction_Detail,Junction_Control,Road2_Class,Road2_Number, Pedestrian_Crossing_Human_Control,Pedestrian_Crossing_Physical_Facilities, Light_Conditions, Weather_Conditions, Road_Surface_Conditions, Special_Conditions_at_Site, Carriageway_Hazards, Urban_or_Rural_Area, Did_Police_Officer_Attend_Scene_of_Accident, LSOA_of_Accident_Location) = fields - - - d[Day_of_Week] += 1 - - - d_nv[Number_of_Vehicles] += 1 - - -d_sorted = sorted(d.items(), key=operator.itemgetter(1)) - -d_nv_sorted = sorted(d_nv.items(), key=operator.itemgetter(1)) - -print(d_sorted) -print(d_nv_sorted) diff --git a/code-python/3databases/bluetoothCreate.py b/code-python/3databases/bluetoothCreate.py deleted file mode 100644 index 1f2b5bd..0000000 --- a/code-python/3databases/bluetoothCreate.py +++ /dev/null @@ -1,59 +0,0 @@ -import psycopg2 -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - -sql = "DROP TABLE IF EXISTS BluetoothSite;" -cur.execute(sql) - -sql = "DROP TABLE IF EXISTS Detection;" -cur.execute(sql) - - -#NB we store timestamp as text here. Later we will use datetime tools to represeent it properly. -sql = "CREATE TABLE Detection ( id serial, siteID text, mac text, timestamp text );" -cur.execute(sql) - -#NB we store location as text here. Later we will use GIS tools to represent it properly. -sql = "CREATE TABLE BluetoothSite ( id serial PRIMARY KEY, siteID text, location text);" -cur.execute(sql) - - -con.commit() - -#fn for filename -fn_sites = "/headless/data/dcc/web_bluetooth_sites.csv" - -count=0 -for line in open(fn_sites): - line=line.strip() - count+=1 - if count<3: - continue #skip first lines - print(line) - (longID, siteID, locationDescription, location, direction)=line.split(",") - #print(siteID, location) - - sql = "INSERT INTO BluetoothSite (siteID, location) VALUES ('%s', '%s');"%(siteID, location) - # print(sql) - cur.execute(sql) - -con.commit() - - -siteID = "MAC000010100" -fn_detections = "/headless/data/dcc/bluetooth/vdFeb14_MAC000010100.csv" -count=0 -for line in open(fn_detections): - line=line.strip() - count+=1 - if count<2: - continue #skip first lines - #print(line) - (timestr, sensortype, direction, dummyA, dummyB, mac)=line.split(",") - print(siteID, timestr, mac) - - sql = "INSERT INTO Detection (siteID, timestamp, mac) VALUES ('%s', '%s', '%s');"%(siteID, timestr, mac) - print(sql) - cur.execute(sql) - -con.commit() diff --git a/code-python/3databases/pySQL.py b/code-python/3databases/pySQL.py deleted file mode 100644 index 54e804a..0000000 --- a/code-python/3databases/pySQL.py +++ /dev/null @@ -1,13 +0,0 @@ -import psycopg2 -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - -sql = "SELECT * FROM Detection;" -cur.execute(sql) -mylist = cur.fetchall() -print(mylist) - - -sql = "INSERT INTO Detection (camera, licencePlate, confidence, timestamp) VALUES (9, 'A06 NPR', 0.78, '2014-04-28:10:18:21');" -cur.execute(sql) -con.commit() \ No newline at end of file diff --git a/code-python/4munging/exercises.py b/code-python/4munging/exercises.py deleted file mode 100644 index c4f81c3..0000000 --- a/code-python/4munging/exercises.py +++ /dev/null @@ -1,49 +0,0 @@ -import pandas as pd -import psycopg2 -con = psycopg2.connect(database='mydatabasename', user='root') -sql = 'SELECT * FROM Detection;' -dataFrame = pd.read_sql_query(sql,con) -print(dataFrame) - - -my_str="michael knight" -my_int=32 -my_float=1.92 -s = "name: %s, age: %d years, height: %f"%(my_str, my_int, my_float) -print(s) - -import datetime -#make datetime from string -dt = datetime.datetime.strptime('2017-02-11_13:00:35.067' , "%Y-%m-%d_%H:%M:%S.%f" ) -#convert datetime object to human readable string -dt.strftime("%Y-%m-%d_%H:%M:%S") -#create a TimeDelta object for difference between two datetimes -delta = dt-datetime.datetime.now() -print(delta) - -#or from scratch: -delta = datetime.timedelta(milliseconds=500) -#print delta as a human readable string -str(delta) -#convert delta to float number of seconds -delta.total_seconds() - -import re -print( re.match("(\d+) , (\d+)", "123 , 456").groups() ) - -from pyparsing import Word, alphas -greet = Word( alphas ) + "," + Word( alphas ) + "!" -greeting = greet.parseString( "Hello, World!" ) -print(greeting) - - - -from pyparsing import * -survey = 'GPS,PN1,LA52.125133215643,LN21.031048525561,EL116.898812' -number = Word(nums+'.').setParseAction(lambda t: float(t[0])) -separator = Suppress(',') -latitude = Suppress('LA') + number -longitude = Suppress('LN') + number -elevation = Suppress('EL') + number -line = (Suppress('GPS,PN1,')+latitude+separator+longitude+separator+elevation) -print(line.parseString(survey)) \ No newline at end of file diff --git a/code-python/4munging/importbluetooth.py b/code-python/4munging/importbluetooth.py deleted file mode 100644 index d93cdd1..0000000 --- a/code-python/4munging/importbluetooth.py +++ /dev/null @@ -1,58 +0,0 @@ -#this extends the code from ch3 to use pandas and datetimes - -import psycopg2 -import pandas as pd - - -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - -sql = "DROP TABLE IF EXISTS BluetoothSite;" -cur.execute(sql) -sql = "DROP TABLE IF EXISTS Detection;" -cur.execute(sql) - - -#NB we store timestamp as text here. Later we will use datetime tools to represeent it properly. -sql = "CREATE TABLE Detection ( id serial, siteID text, mac text, timestamp timestamp );" -cur.execute(sql) -#NB we store location as text here. Later we will use GIS tools to represent it properly. -sql = "CREATE TABLE BluetoothSite ( id serial PRIMARY KEY, siteID text, location text);" -cur.execute(sql) - -con.commit() - -#see how pandas makes life easier than in chapter 3: - -fn_sites = "/headless/data/dcc/web_bluetooth_sites.csv" -df_sites = pd.read_csv(fn_sites, header=1) #dataframe. header is which row to use for the field names. -for i in range(0, df_sites.shape[0]): - sql = "INSERT INTO BluetoothSite (siteID, location) VALUES ('%s', '%s');"%(df_sites.iloc[i]['Site ID'], df_sites.iloc[i]['Grid']) - cur.execute(sql) -con.commit() - - -dir_detections = "/headless/data/dcc/bluetooth/" -import os -import re -import datetime -for fn in sorted(os.listdir(dir_detections)): #import ALL sensor files - print("processing file: "+fn) - - m = re.match("vdFeb14_(.+).csv", fn) #use regex to extract the sensor ID - if m is None: #if there was no regex match - continue #ignore any non detection files - - siteID = m.groups()[0] - fn_detections = dir_detections+fn - df_detections = pd.read_csv(fn_detections, header=0) #dataframe. header is which row to use for the field names. - - #here we use Python's DateTime library to store times properly - for i in range(0, df_detections.shape[0]): - datetime_text = df_detections.iloc[i]['Unnamed: 0'] - dt = datetime.datetime.strptime(datetime_text , "%d/%m/%Y %H:%M:%S" ) #proper Python datetime - sql = "INSERT INTO Detection (siteID, timestamp, mac) VALUES ('%s', '%s', '%s');"%(siteID, dt, df_detections.iloc[i]['Number Plate']) - cur.execute(sql) -con.commit() - - diff --git a/code-python/5spatial/appendix_ogr_roads.py b/code-python/5spatial/appendix_ogr_roads.py deleted file mode 100644 index 12c1291..0000000 --- a/code-python/5spatial/appendix_ogr_roads.py +++ /dev/null @@ -1,24 +0,0 @@ -import ogr #use the low level OGR library -from pylab import * - -#in this example we plot a map using OGR -ds = ogr.Open("/headless/data/dcc.osm.shp/lines.shp") #datasource -layer = ds.GetLayer(0) -nameList = [] -for feature in layer: - col="y" - #change the colour if its an interesting road type - highwayType = feature.GetField("highway") - if highwayType != None: - col="k" - if highwayType=="trunk": - col="g" - name = feature.GetField("name") - nameList.append(name) - #get the features set of point locations (a wiggly line) - geomRef=feature.GetGeometryRef() - x=[geomRef.GetX(i) for i in range(geomRef.GetPointCount())] - y=[geomRef.GetY(i) for i in range(geomRef.GetPointCount())] - plot(x,y, col) - - \ No newline at end of file diff --git a/code-python/5spatial/appendix_ogr_sensors.py b/code-python/5spatial/appendix_ogr_sensors.py deleted file mode 100644 index ed1ebbb..0000000 --- a/code-python/5spatial/appendix_ogr_sensors.py +++ /dev/null @@ -1,14 +0,0 @@ -import ogr #use the low level OGR library -from pylab import * - -#in this example we access fields and print them -ds = ogr.Open("/headless/data/dcc/examples/BluetoothUnits.shp") -layer = ds.GetLayer(0) #shapefiles may have multiple layers -ldefn = layer.GetLayerDefn() -#loop over each feature NAME in the layer DEFINITION -for n in range(ldefn.GetFieldCount()): - featurename = ldefn.GetFieldDefn(n).name - print(featurename) -for feature in layer: #loop over each object in the layer - location_description = feature.GetField("Location") - print(location_description) diff --git a/code-python/5spatial/derbyshire_map.py b/code-python/5spatial/derbyshire_map.py deleted file mode 100644 index 200977a..0000000 --- a/code-python/5spatial/derbyshire_map.py +++ /dev/null @@ -1,37 +0,0 @@ -import psycopg2 -import pandas as pd -import geopandas as gpd -import sys -from pylab import * - -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - -sql = "DROP TABLE IF EXISTS Road;" -cur.execute(sql) -sql = "CREATE TABLE Road (name text, geom geometry, highway text);" -cur.execute(sql) - -#importing roads shapefile to database -#(this has come from openstreetmap, then ogr2ogr ) -fn_osm_shp = "/headless/data/dcc.osm.shp/lines.shp" -df_roads = gpd.GeoDataFrame.from_file(fn_osm_shp) -df_roads = df_roads.to_crs({'init': 'epsg:27700'}) -for index, row in df_roads.iterrows(): - sql="INSERT INTO Road VALUES ('%s', '%s', '%s');"%(row.name, row.geometry, row.highway ) - print(sql) - cur.execute(sql) -con.commit() -#road plotting -sql = "SELECT * FROM Road;" -df_roads = gpd.GeoDataFrame.from_postgis(sql,con,geom_col='geom') # -print(df_roads) -for index, row in df_roads.iterrows(): - (xs,ys) = row['geom'].coords.xy - color='y' - #road colour by type - if row['highway']=="motorway": - color = 'b' - if row['highway']=="trunk": - color = 'g' - plot(xs, ys, color) \ No newline at end of file diff --git a/code-python/5spatial/derbyshire_map_with_bluetooth.py b/code-python/5spatial/derbyshire_map_with_bluetooth.py deleted file mode 100644 index 867b18c..0000000 --- a/code-python/5spatial/derbyshire_map_with_bluetooth.py +++ /dev/null @@ -1,99 +0,0 @@ -#this extends the code from ch3 and ch4 to use geopandas - -import psycopg2 -import pandas as pd -import geopandas as gpd -from pylab import * - -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - -def importRoads(): - #(data has come from openstreetmap, then ogr2ogr ) - print("importing roads...") - sql = "DROP TABLE IF EXISTS Road;" - cur.execute(sql) - sql = "CREATE TABLE Road (name text, geom geometry, highway text);" - cur.execute(sql) - fn_osm_shp = "/headless/data/dcc.osm.shp/lines.shp" - df_roads = gpd.GeoDataFrame.from_file(fn_osm_shp) - df_roads = df_roads.to_crs({'init': 'epsg:27700'}) - for index, row in df_roads.iterrows(): - sql="INSERT INTO Road VALUES ('%s', '%s', '%s');"%(row.name, row.geometry, row.highway ) -# print(sql) - cur.execute(sql) - con.commit() - -def importBluetoothSites(): - print("importing sites...") - sql = "DROP TABLE IF EXISTS BluetoothSite;" - cur.execute(sql) - sql = "CREATE TABLE BluetoothSite ( id serial PRIMARY KEY, siteID text, geom geometry);" - cur.execute(sql) - con.commit() - fn_sites = "/headless/data/dcc/web_bluetooth_sites.csv" - df_sites = pd.read_csv(fn_sites, header=1) #dataframe. header is which row to use for the field names. - for i in range(0, df_sites.shape[0]): - #munging to extract the coordinates - the arrive in National Grid - locationstr = str(df_sites.iloc[i]['Grid']) - bng_east = locationstr[0:6] - bng_north = locationstr[6:12] - sql = "INSERT INTO BluetoothSite (siteID, geom) VALUES ('%s', 'POINT(%s %s)');"%(df_sites.iloc[i]['Site ID'], bng_east, bng_north ) - cur.execute(sql) - con.commit() - -def importDetections(): - print("importing detections...") - sql = "DROP TABLE IF EXISTS Detection;" - cur.execute(sql) - sql = "CREATE TABLE Detection ( id serial, siteID text, mac text, timestamp timestamp );" - cur.execute(sql) - dir_detections = "/headless/data/dcc/bluetooth/" - import os - import re - import datetime - for fn in sorted(os.listdir(dir_detections)): #import ALL sensor files - print("processing file: "+fn) - m = re.match("vdFeb14_(.+).csv", fn) #use regex to extract the sensor ID - if m is None: #if there was no regex match - continue #ignore any non detection files - siteID = m.groups()[0] - fn_detections = dir_detections+fn - df_detections = pd.read_csv(fn_detections, header=0) #dataframe. header is which row to use for the field names. - #here we use Python's DateTime library to store times properly - for i in range(0, df_detections.shape[0]): - datetime_text = df_detections.iloc[i]['Unnamed: 0'] - dt = datetime.datetime.strptime(datetime_text , "%d/%m/%Y %H:%M:%S" ) #proper Python datetime - sql = "INSERT INTO Detection (siteID, timestamp, mac) VALUES ('%s', '%s', '%s');"%(siteID, dt, df_detections.iloc[i]['Number Plate']) - cur.execute(sql) - con.commit() - -def plotRoads(): - print("plotting roads...") - sql = "SELECT * FROM Road;" - df_roads = gpd.GeoDataFrame.from_postgis(sql,con,geom_col='geom') # - for index, row in df_roads.iterrows(): - (xs,ys) = row['geom'].coords.xy - color='y' - #road colour by type - if row['highway']=="motorway": - color = 'b' - if row['highway']=="trunk": - color = 'g' - #if not color=='y': #only plot major roads - plot(xs, ys, color) - -def plotBluetoothSites(): - sql = "SELECT siteID, geom FROM BluetoothSite;" - df_sites = gpd.GeoDataFrame.from_postgis(sql,con,geom_col='geom') # - for index, row in df_sites.iterrows(): - (xs,ys) = row['geom'].coords.xy - plot(xs, ys, 'bo') - - - -importRoads() -importBluetoothSites() -#importDetections() -plotRoads() -plotBluetoothSites() diff --git a/code-python/5spatial/geopands_postgis_links.py b/code-python/5spatial/geopands_postgis_links.py deleted file mode 100644 index 8273d5c..0000000 --- a/code-python/5spatial/geopands_postgis_links.py +++ /dev/null @@ -1,74 +0,0 @@ -#example read and write with geopandas and postGIS - -import psycopg2 -import sys -import psycopg2 -import pandas as pd -import geopandas as gpd - - -import pyproj -lat = 53.232350; lon = -1.422151 -projSrc = pyproj.Proj(proj="latlon", ellps="WGS84", datum="WGS84") -projDst = pyproj.Proj(proj="utm", utm_zone="30U", ellps="clrk66") -(east_m, north_m)=pyproj.transform(projSrc, projDst,lon,lat) #non-ISO! -print(east_m, north_m) - - - -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - -sql = "DROP TABLE IF EXISTS BluetoothSite;" -cur.execute(sql) -sql = "DROP TABLE IF EXISTS Detection;" -cur.execute(sql) -sql = "DROP TABLE IF EXISTS Route;" -cur.execute(sql) -sql = "DROP TABLE IF EXISTS City;" -cur.execute(sql) - - -sql = "CREATE TABLE BluetoothSite (siteID text, geom geometry);" -cur.execute(sql) - -sql = "CREATE TABLE Route (name text, geom geometry);" -cur.execute(sql) - -sql = "CREATE TABLE City (name text, geom geometry);" -cur.execute(sql) - - -sql = "INSERT INTO BluetoothSite VALUES ('ID1003', 'POINT(0 -4)'), ('ID9984', 'POINT(1 1)');" -cur.execute(sql) -sql = "INSERT INTO Route VALUES ('route1', 'LINESTRING(0 0,-1 1)'), ('route2', 'LINESTRING(0 0, 1 1)');" -cur.execute(sql) -sql = "INSERT INTO City VALUES ('Chesterfield', 'POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))');" -cur.execute(sql) - -con.commit() - -#using geopandas to retreive GIS data -sql = "SELECT siteID, geom FROM BluetoothSite;" -gdf=gpd.GeoDataFrame.from_postgis(sql,con,geom_col='geom' ) -print(gdf) - -#Note that if we convert GIS to non-GIS data *inside* the databsae, -#then we don't need geopandas, and just use pandas instead. -sql = "SELECT ST_X(geom), ST_Y(geom) FROM BluetoothSite;" -df = pd.read_sql_query(sql,con) -print(df) - - -sql = "SELECT * FROM BluetoothSite;" -df = gpd.GeoDataFrame.from_postgis(sql,con,geom_col='geom') -print(df['geom'][0].coords.xy) #get coordinates as numbers -for index, row in df.iterrows(): #loop over rows - print(row) - -#tell GeoPandas what coordinate system the numbers are in -df.crs = {'init': 'epsg:4326', 'no_defs' : True } - -#use GeoPandas to convert them to a different one. -df = df.to_crs(epsg=27700) - diff --git a/code-python/6pymc/accidents.py b/code-python/6pymc/accidents.py deleted file mode 100644 index 7e47081..0000000 --- a/code-python/6pymc/accidents.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/python3 - -#sudo pip3 install graphviz -#sudo pip3 install pydot - -# import necessary libraries -import pymc3 as pm -import numpy as np -import theano.tensor as t -import theano -from theano.printing import pydotprint -import matplotlib.pyplot as plt - -# PyMC3 notation -with pm.Model() as inferAccidents_Model: - - # generate our data - data=np.array([ 4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, - 4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, - 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,0, 1, 0, 1, 0, 0, 0, 2, 1, 0, - 0, 0, 1, 1, 0, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1]) - - # no need to have "beta" in Exponential parameters - switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=110) - early_mean = pm.Exponential('early_mean', 1.0) - late_mean = pm.Exponential('late_mean', 1.0) - - # define custom, deterministic distribution - # we need to do it with theano - # similar notation to PyMC2 can be used for simple distributions - @theano.compile.ops.as_op(itypes=[t.lscalar, t.dscalar,t.dscalar],otypes=[t.dvector]) - def rate(switchpoint, early_mean, late_mean): - out=np.empty(len(data)) - out[:switchpoint] = early_mean - out[switchpoint:] = late_mean - return out.flatten() - - # need to explicitly define inputs for "rate" to run - accidents = pm.Poisson('accidents', mu=rate(switchpoint, early_mean, late_mean), observed=data) - - # no support for dag in PyMC3 - # we do it with theano instead - # install pydotprint for python 3: ' pip3 install pydotprint ' - # install graphviz - # pydotprint(inferAccidents_Model.logpt) - - # define iteration start - start = pm.find_MAP() - - # MCMC in PyMC3 - step = pm.Metropolis() - trace=pm.sample(1e4, start=start, step = step, model=inferAccidents_Model) - -# show our amazing results -pm.traceplot(trace[0:]); -plt.show() diff --git a/code-python/6pymc/trafficLight.py b/code-python/6pymc/trafficLight.py deleted file mode 100644 index 16a552b..0000000 --- a/code-python/6pymc/trafficLight.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python3 - -# import libraries -import pymc3 as pm -from scipy import stats -import matplotlib.pyplot as plt - -# this is PyMC3 notation -# essentially model initialisation in PyMC3 is done into the model fitting portion of the -# code and there is no model building as in PyMC2 - -with pm.Model() as trafficLight_model: - - # define our data - PyMC3 does not like map objects - data = stats.bernoulli(0.2).rvs(100000) - - # similar as in PyMC2 - theta = pm.Beta("theta", alpha=1.0 , beta=1.0) - # "observed" replaces "value" - color = pm.Bernoulli("color", p=theta, observed=data) - - # define iteration start - start = pm.find_MAP() - - # MCMC in PyMC3 - step = pm.Metropolis() - trace=pm.sample(1e4, start=start, step = step, model=trafficLight_model) - -# show our amazing results -pm.traceplot(trace[0:]); -plt.show() \ No newline at end of file diff --git a/code-python/7discrim/discrimExamples.py b/code-python/7discrim/discrimExamples.py deleted file mode 100644 index 9883be0..0000000 --- a/code-python/7discrim/discrimExamples.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -from pylab import * - -def plot_cov_ellipse(cov, pos, volume=.5, ax=None, fc='none', ec=[0,0,0], a=1, lw=2): - from scipy.stats import chi2 - import matplotlib.pyplot as plt - from matplotlib.patches import Ellipse - def eigsorted(cov): - vals, vecs = np.linalg.eigh(cov) - order = vals.argsort()[::-1] - return vals[order], vecs[:,order] - if ax is None: - ax = plt.gca() - vals, vecs = eigsorted(cov) - theta = np.degrees(np.arctan2(*vecs[:,0][::-1])) - kwrg = {'facecolor':fc, 'edgecolor':ec, 'alpha':a, 'linewidth':lw} - # Width and height are "full" widths, not radius - width, height = 2 * np.sqrt(chi2.ppf(volume,2)) * np.sqrt(vals) - ellip = Ellipse(xy=pos, width=width, height=height, angle=theta, **kwrg) - ax.add_artist(ellip) - -def plot_line(gradient, intersect, xmin, xmax): - y0 = gradient*xmin + intersect - y1 = gradient*xmax + intersect - plot( [xmin,xmax], [y0, y1], 'k' ) - - -mu_p = np.array([270, 200]) -mu_d = np.array([300, 150]) - -sigma = 250 -Sigma = np.matrix([[sigma, 0], [0, sigma]]) - -xs_p = np.random.multivariate_normal(mu_p, Sigma, 100) -xs_d = np.random.multivariate_normal(mu_d, Sigma, 100) - - -clf() -plot( xs_p[:,0] , xs_p[:,1] , 'bx' ) -plot( xs_d[:,0] , xs_d[:,1] , 'ro' ) - -xlabel("NOx") -ylabel("CO2") - - -#draw covarience ellipses -plot_cov_ellipse(Sigma, mu_p) -plot_cov_ellipse(Sigma, mu_d) - -#TODO draw line separating (as fn of generatives?) - -T = 0.0 -w = np.dot ( np.linalg.inv(Sigma) , (mu_p - mu_d ) ) -c = np.dot(T - np.dot(mu_p, np.linalg.inv(Sigma)), mu_p) + np.dot(np.dot(mu_p, np.linalg.inv(Sigma)), mu_p) - - -c=-110 -g=1 -plot_line(g,c, 220, 360) - - -import sklearn.discriminant_analysis -np.hstack (( zeros(xs_p.shape[0]) , ones(xs_d.shape[0]) )) - -x = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1]]) -c = np.array([1, 1, 1, 2, 2]) -lda = sklearn.discriminant_analysis.LinearDiscriminantAnalysis() -lda.fit(x, c) -print(lda.predict([[-0.8, -1]])) - - - -xs = np.vstack((xs_p, xs_d)) -cs = np.hstack (( zeros(xs_p.shape[0]) , ones(xs_d.shape[0]) )) - -#fit a decision tree -from sklearn.tree import DecisionTreeClassifier, export_graphviz -dt = DecisionTreeClassifier(min_samples_split=20, random_state=99) -dt.fit(xs, cs) - -#these lines draw a picture of the tree -#import subprocess -#export_graphviz(dt, "foo.dot", ["f","g"]) -#subprocess.call("dot -Tpng foo.dot -o dt.png", shell=True) - -#fit a neural network -from sklearn.neural_network import MLPClassifier -clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10, 10), random_state=1) -clf.fit(xs, cs) -cs_hat_nn = clf.predict(xs) -print("ground truth:") -print(cs) -print("neural net predictions:") -print(cs_hat_nn) -#NB in real life you should use a separate test set for this to avoid overfitting. - diff --git a/code-python/8spatial/GP_oneD.py b/code-python/8spatial/GP_oneD.py deleted file mode 100644 index bdf0301..0000000 --- a/code-python/8spatial/GP_oneD.py +++ /dev/null @@ -1,9 +0,0 @@ -import GPy, numpy as np -N = 5 -X = np.random.uniform(-3.,3.,(N,1)) -Y = np.sin(X) + np.random.randn(N,1)*0.05 -kernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.) -m= GPy.models.GPRegression(X,Y,kernel) -m.optimize() -m.plot() - diff --git a/code-python/8spatial/GP_twoD.py b/code-python/8spatial/GP_twoD.py deleted file mode 100644 index e5832e9..0000000 --- a/code-python/8spatial/GP_twoD.py +++ /dev/null @@ -1,15 +0,0 @@ -import GPy, numpy as np -#generate some data -X = np.random.uniform(-3.,3.,(50,2)) -Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05 -#define, fit and plot 2D Gaussian Process -ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.White(2) -m = GPy.models.GPRegression(X,Y,ker) -m.optimize(messages=True,max_f_eval = 1000) -m.plot() -#plot uncertinties of three 1D slices through the 2D surface -slices = [-1, 0, 1.5] -figure = GPy.plotting.plotting_library().figure(3, 1) -for i, y in zip(range(3), slices): - m.plot(figure=figure, fixed_inputs=[(1,y)], row=(i+1), plot_data=False) - diff --git a/code-python/8spatial/routing.py b/code-python/8spatial/routing.py deleted file mode 100644 index c1f01a8..0000000 --- a/code-python/8spatial/routing.py +++ /dev/null @@ -1,75 +0,0 @@ -import geopandas as gpd -import psycopg2 -import pyproj -from pylab import * -import pyproj - - -#before using this you need to make a link-breaked version of the roads in yoru database -#this is done with the osm2pgrourting tool. -#before use it requires a security setup: (for security reasons it wqill only run on a database which has a password setup): - -#$ sudo -u postgres psql -# \password -# (enter 'postgres' for the password twice) -# Ctrl-D (to exit psql) - -#Then run the link-breaking with: -#$ osm2pgrouting -f data/dcc.osm -d mydatabasename -W postgres - - -#this is NOT the same as importing osm via shapefiles yourself into a Road table -#as it splits links up into smaller units to make a routable network. - -#you also need to do -# $ psql -d mydatabasename -# CREATE EXTENSION pgrouting -#in psql, to enable postgres routing extension. - - -wgs84 = pyproj.Proj(init='epsg:4326') #WGS84 -bng = pyproj.Proj(init='epsg:27700') #british national grid - -#set up plotting -clf() -hold(True) - -#set up database connection -con = psycopg2.connect(database='mydatabasename', user='root') -cur = con.cursor() - - - -#routing - gives a list of edge gids (not osm_ids, which may be split up into smaller gids by osm2pgr) -def doRouting(): - sql = "SELECT * FROM pgr_dijkstra('SELECT gid AS id,source,target,length AS cost FROM ways', 170,750, directed := false), ways WHERE ways.gid=pgr_dijkstra.edge;" - df_route = gpd.GeoDataFrame.from_postgis(sql,con,geom_col='the_geom') - b_route=dict() - routeEdgesOsmIDs = df_route['edge'] - for i in routeEdgesOsmIDs: - b_route[i]=1 - return b_route - - -def makeMap(route): - sql = "SELECT * FROM ways;" #NB using osm2pgrouting link-breaked tables, not original OSM roads - df_roads = gpd.GeoDataFrame.from_postgis(sql,con,geom_col='the_geom') - for i in range(0,df_roads.shape[0]): - road = df_roads.iloc[i] - lons = road['the_geom'].coords.xy[0] #coordinates in latlon - lats = road['the_geom'].coords.xy[1] - gid = int(road.gid) - xs=[] - ys=[] - n_segments = len(lons) - for j in range(0, n_segments): - (x,y) = pyproj.transform(wgs84, bng, lons[j], lats[j]) #project to BNG -- uses nonISO lonlat convention - xs.append(x) - ys.append(y) - color='y' - if gid in route: #color the route in red, other roads yellow - color='r' - plot(xs, ys, color) - -route = doRouting() -makeMap(route) diff --git a/code-python/9vis/appendix_leaflet_mapnik/drawLeaflet.py b/code-python/9vis/appendix_leaflet_mapnik/drawLeaflet.py deleted file mode 100644 index e5446e4..0000000 --- a/code-python/9vis/appendix_leaflet_mapnik/drawLeaflet.py +++ /dev/null @@ -1,36 +0,0 @@ -import ogr - -import subprocess - -def createShapeFile(): - driver = ogr.GetDriverByName('ESRI Shapefile') - datasource = driver.CreateDataSource('towncenter.shp') - layer = datasource.CreateLayer('layerName',geom_type=ogr.wkbPolygon) - lonmin = -1.4366 - latmin = 53.2242 - lonmax = -1.4102 - latmax = 53.2396 - myRing = ogr.Geometry(type=ogr.wkbLinearRing) - myRing.AddPoint(lonmin, latmin)#LowerLeft - myRing.AddPoint(lonmin, latmax)#UpperLeft - myRing.AddPoint(lonmax, latmax)#UpperRight - myRing.AddPoint(lonmax, latmin)#Lower Right - myRing.AddPoint(lonmin,latmin)#close ring - myPoly = ogr.Geometry(type=ogr.wkbPolygon) - myPoly.AddGeometry(myRing) - feature = ogr.Feature( layer.GetLayerDefn() ) - feature.SetGeometry(myPoly) - layer.CreateFeature(feature) - feature.Destroy() #flush memory - datasource.Destroy() - -createShapeFile() - - -#convert shapefile to JSON -subprocess.call("ogr2ogr -f GeoJSON -s_srs wgs84 -t_srs wgs84 towncenter.json.tmp towncenter.shp", shell=True) - -#add head and tail text to JSON to make it work in Leaflet -subprocess.call(' echo "var towncenter =" > head && echo ";" > tail && cat head towncenter.json.tmp tail > towncenter.json; rm head && rm tail', shell=True) - -subprocess.call("firefox webpage.html", shell=True) diff --git a/code-python/9vis/appendix_leaflet_mapnik/mapnik_example.py b/code-python/9vis/appendix_leaflet_mapnik/mapnik_example.py deleted file mode 100644 index e8e15c9..0000000 --- a/code-python/9vis/appendix_leaflet_mapnik/mapnik_example.py +++ /dev/null @@ -1,65 +0,0 @@ -#mapnik is not installed in itslive as its authors are updating it at the time of writing. -#to install the latest development version yourself for future versions, follow the instructions from, -# https://github.com/mapnik/mapnik/wiki/UbuntuInstallation - -import ogr -import mapnik - -def createShapeFile(): - driver = ogr.GetDriverByName('ESRI Shapefile') - datasource = driver.CreateDataSource('test.shp') - layer = datasource.CreateLayer('layerName',geom_type=ogr.wkbPolygon) - lonmin = -1.4366 - latmin = 53.2242 - lonmax = -1.4102 - latmax = 53.2396 - myRing = ogr.Geometry(type=ogr.wkbLinearRing) - myRing.AddPoint(lonmin, latmin)#LowerLeft - myRing.AddPoint(lonmin, latmax)#UpperLeft - myRing.AddPoint(lonmax, latmax)#UpperRight - myRing.AddPoint(lonmax, latmin)#Lower Right - myRing.AddPoint(lonmin,latmin)#close ring - myPoly = ogr.Geometry(type=ogr.wkbPolygon) - myPoly.AddGeometry(myRing) - feature = ogr.Feature( layer.GetLayerDefn() ) - feature.SetGeometry(myPoly) - layer.CreateFeature(feature) - feature.Destroy() #flush memory - datasource.Destroy() - -def makeStyleMyShape(): - s = mapnik.Style() - rule_poly = mapnik.Rule() # rule object to hold symbolizers - polygon_symbolizer = mapnik.PolygonSymbolizer() - polygon_symbolizer.fill = mapnik.Color('#ffff00') - rule_poly.symbols.append(polygon_symbolizer) # add the symbolizer to the rule object - line_symbolizer = mapnik.LineSymbolizer() - line_symbolizer.stroke = mapnik.Color('rgb(50%,50%,50%)') - line_symbolizer.stroke_width = 0.1 - rule_poly.symbols.append(line_symbolizer) # add the symbolizer to the rule object - s.rules.append(rule_poly) # now add the rule to the style and we're done - return s - -def renderMap(): - m = mapnik.Map(600,600) - mapnik.load_map(m, 'mini_osm.xml') - s_my = makeStyleMyShape() - m.append_style('mystyle',s_my) - - layer = mapnik.Layer("osm") - ds = mapnik.Osm(file="/home/charles/data/osm/dcc.osm") - layer.datasource = ds - layer.styles.append('cf') - m.layers.append(layer) - - layer2 = mapnik.Layer("myshape") - ds = mapnik.Shapefile(file="test.shp") - layer2.datasource = ds - layer2.styles.append('mystyle') - m.layers.append(layer2) - - m.zoom_all() - mapnik.render_to_file(m, 'dcc.png', 'png') - -createShapeFile() -renderMap() diff --git a/code-python/README.md b/code-python/README.md deleted file mode 100644 index 0401722..0000000 --- a/code-python/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Code written by Charles fox - -This code supports [Data Science for Transport](https://www.amazon.co.uk/Data-Science-Transport-Self-Study-Environment/dp/3319729527) and runs on the docker image https://hub.docker.com/r/itsleeds/itsleeds/ diff --git a/code-python/myfilename.txt b/code-python/myfilename.txt deleted file mode 100644 index b6fc4c6..0000000 --- a/code-python/myfilename.txt +++ /dev/null @@ -1 +0,0 @@ -hello \ No newline at end of file diff --git a/code-python/osmnx-demo.py b/code-python/osmnx-demo.py deleted file mode 100644 index b81272d..0000000 --- a/code-python/osmnx-demo.py +++ /dev/null @@ -1,15 +0,0 @@ -# Aim: demonstrat osmnx in action - -place = "Dangkao" - -import networkx as nx -import osmnx as ox -import requests -import matplotlib.cm as cm -import matplotlib.colors as colors -# after fixing a couple of issues, e.g. with https://github.com/gboeing/osmnx/issues/45 and: -# Error in py_call_impl(callable, dots$args, dots$keywords) : -# ImportError: Something is wrong with the numpy installation. While importing we detected an older version of numpy in ['/home/robin/.local/lib/python3.6/site-packages/numpy']. One method of fixing this is to repeatedly uninstall numpy until none is found, then reinstall this version. -# ... I can do this: -G = ox.graph_from_place(place, network_type='drive') -ox.plot_graph(ox.project_graph(G)) # note this was sloooow! diff --git a/code-python/read-sort-csv.py b/code-python/read-sort-csv.py deleted file mode 100644 index ea5433d..0000000 --- a/code-python/read-sort-csv.py +++ /dev/null @@ -1,3 +0,0 @@ -import pandas as pd -e = pd.read_csv("/mnt/27bfad9a-3474-4e61-9a43-0156ebc67d67/home/robin/ITSLeeds/TDS/sample-data/everyone.csv") -pd.DataFrame.sort_values(self=e, by="n_coffee") diff --git a/code-python/run-python.sh b/code-python/run-python.sh deleted file mode 100644 index 6e14603..0000000 --- a/code-python/run-python.sh +++ /dev/null @@ -1,29 +0,0 @@ -# aim: run python in docker - -docker run -d -p 8787:8787 -v $(pwd):/home/rstudio/data robinlovelace/tds -docker ps - -# navigate to http://localhost:8787 -# log-in with username and password rstudio -# then press Alt+Shift+T to go into the shell and enter the following -# can also be run from .sh script from the command line: -conda activate base -python - -# now in python shell -import matplotlib as mpl -mpl.use('Agg') -import osmnx as ox -ox.config(use_cache=True) -G = ox.graph_from_place('Piedmont, CA, USA', network_type='drive') -# ox.plot_graph(ox.project_graph(G)) -fig, ax = ox.plot_graph(ox.project_graph(G), filename = "plot", show=False, save=True, close=True) - -ox.save_load.save_graph_shapefile(G, filename="graph", folder=".") -# ox.plot_graph(ox.project_graph(G), filename = "plot.png", show = False, save = True, close = True) # crashes - -# then from R console: -list.files() # check graph.shp is there -library(sf) -G = st_read("graph/edges/edges.shp") -plot(G) diff --git a/code-r/01-person-data.R b/code-r/01-person-data.R deleted file mode 100644 index 8519fce..0000000 --- a/code-r/01-person-data.R +++ /dev/null @@ -1,75 +0,0 @@ -# group 1 -person_name = c( - "robin", - "malcolm", - "richard" -) -n_coffee = c( - 5, - 1, - 0 -) -like_bus_travel = c( - TRUE, - FALSE, - TRUE -) -personal_data1 = data.frame( - person_name, - n_coffee, - like_bus_travel - ) - -#Group 2 -person_name = c( - "Zi", - "Ignacio") -n_coffee = c( - 4, - 0) -like_bus_travel = c( - FALSE, - TRUE) -personal_data2 = data.frame( - person_name, - n_coffee, - like_bus_travel -) - -#Group 3 -person_name = c("Caroline", "Tatjana") -n_coffee = c(6, 8) -like_bus_travel = c(FALSE, FALSE) - -personal_data3 = data.frame(person_name, - n_coffee, - like_bus_travel) - - -# Group 4 -person_name = c( - "Hawah", - "Colin", - "Eugeni") - -n_coffee = c( - 2,0,7 -) - -like_bus_travel= c(FALSE, TRUE, FALSE) -person_data4 = data.frame( - person_name, - n_coffee, - like_bus_travel -) - -everyone = rbind( - personal_data1, - personal_data2, - personal_data3, - person_data4 - ) - -mean(everyone$n_coffee) - -readr::write_csv(everyone, "sample-data/everyone.csv") diff --git a/code-r/get-cycle-parking.R b/code-r/get-cycle-parking.R deleted file mode 100644 index f76d5aa..0000000 --- a/code-r/get-cycle-parking.R +++ /dev/null @@ -1,8 +0,0 @@ -# aim: get cycle parking data - -library(geofabrik) -library(dplyr) -osm_points = get_geofabrik(name = "West Yorkshire", layer = "points") -cycle_parking = osm_points %>% filter(amenity == "bicycle_parking") -cycle_parking -mapview::mapview(cycle_parking) diff --git a/code-r/getting-data-gov-uk.R b/code-r/getting-data-gov-uk.R deleted file mode 100644 index b84271d..0000000 --- a/code-r/getting-data-gov-uk.R +++ /dev/null @@ -1,45 +0,0 @@ -# Aim: demonstrate how to get data from UK gov sources - -library(tidyverse) - -# First stage: search - I searched on Google for: -# https://www.google.com/search?q=recycling+points+leeds+geojson - -# That took me here: -# https://datamillnorth.org/publisher/leedscitycouncil?format=geojsonhttps://datamillnorth.org/publisher/leedscitycouncil?format=geojson - - -# That linked me to: -# https://datamillnorth.org/dataset/bring-sites - -u = "https://datamillnorth.org/download/bring-sites/97bd60ae-ced3-4ddc-996e-f1ebe5d21136/tonnage%20to%20Sept%2020.csv" -waste_sites = read_csv(u) -waste_sites -dim(waste_sites) -skimr::skim(waste_sites) -waste_sites_unique = waste_sites %>% - # remove missing values: data cleaning - filter(! is.na(Longitude)) %>% - group_by(`Site Name`, Longitude, Latitude) %>% - summarise(tonnes_glass = sum(`Apr 20 Glass Tonnage Kg`, `May 20 Glass Tonnage Kg`)) - -waste_sites_sf = sf::st_as_sf(waste_sites_unique, coords = c("Longitude", "Latitude"), crs = 4326) - -mapview::mapview(waste_sites_sf) - -# remove sites that are miles away -pct_regions = pct::pct_regions -waste_sites_clean = waste_sites_sf[pct_regions, ] - -library(tmap) -tm_shape(waste_sites_clean) + - tm_dots(size = "tonnes_glass") - - -# Tests - -# u = "https://mapservices.leeds.gov.uk/arcgis/rest/services/Public/Waste/MapServer/2?f=pjson" -# d_json = jsonlite::read_json(u) -# d_json$name -# d_json$fields -# d = jsonlite::flatten(d_json) diff --git a/code-r/l7.R b/code-r/l7.R deleted file mode 100644 index f7755de..0000000 --- a/code-r/l7.R +++ /dev/null @@ -1,33 +0,0 @@ -library(spData) -nz2 = nz -plot(nz) -nz - -plot(st_geometry(nz)) -plot(nz_height, add = TRUE) - -library(tmap) -tmap_mode("plot") -names(nz) -tm_shape(nz) + - tm_fill("Population", palette = "viridis") + - tm_borders(col = "black") - -tmap_mode("view") -tmap_mode("plot") -m = tm_shape(nz) + - tm_fill("Population") + - tm_borders(col = "grey") - -tmap_save(m, "m.png") - -class(m) -tmap_save(m, "m.html") - -vignette("ggplot2-specs") - -mapview::mapview(nz) - -library(stplanr) -l = flowlines_sf -plot(l) diff --git a/code-r/otp-demo.R b/code-r/otp-demo.R deleted file mode 100644 index 1675871..0000000 --- a/code-r/otp-demo.R +++ /dev/null @@ -1,75 +0,0 @@ -# Aim: set-up and use local routing service - -# Settings: Things to change to work on your PC -path_data = file.path(tempdir(), "otp") - - -# Check Java: Linux Only -# install java with sudo apt install openjdk-8-jdk on Ubuntu -# system("which javac") -# system("java -version") -# system("update-java-alternatives --list") -# system("sudo update-java-alternatives --set /usr/lib/jvm/java-1.8.0-openjdk-amd64") - -# Setup: -devtools::install_github("ropensci/opentripplanner", build_vignettes = TRUE) -devtools::install_github("itsleeds/geofabrik") - -library(geofabrik) -library(opentripplanner) -library(tmap) -tmap_mode("view") - - -# test with a remote server ----------------------------------------------- - -# note: this will only work if the remote instance is working (not reliable) -otpcon = otp_connect(hostname = "86.6.99.6", port = 8080) # To connect to robin's instance - -route_walk = otp_plan(otpcon, - fromPlace = c(-1.63078, 53.84675), - toPlace = c(-1.59499, 53.81743), mode = "WALK") -plot(route_walk) - - -# set-up a local otp server instance -------------------------------------- - -# Make file structure, download files -dir.create(path_data) -dir.create(file.path(path_data, "graphs")) -dir.create(file.path(path_data, "graphs", "default")) -path_otp = otp_dl_jar(path_data) -wy = geofabrik::gf_find("West Yorkshire") -download.file(wy$pbf_url, file.path(path_data,"graphs","default","wy.pbf")) -gtfs_url = "https://github.com/ITSLeeds/TDS/releases/download/0.20.1/wy_rail8.zip" -dem_url = "https://github.com/ITSLeeds/TDS/releases/download/0.20.1/dem.tif" - -add_data = function(url, path_data, router = "default", filename = "gtfs.zip") { - download.file(url, file.path(path_data, "graphs", router, filename)) -} - -add_data(gtfs_url, path_data, filename = "gtfs.zip") -add_data(dem_url, path_data, filename = "dem.tif") -#otp_dl_demo(path_data) - -log1 = otp_build_graph(otp = path_otp, dir = path_data) -log2 = otp_setup(otp = path_otp, dir = path_data) - -otpcon = otp_connect() # if OpenTripPlanner is running locally - -route_walk = otp_plan(otpcon, - fromPlace = c(-1.63078, 53.84675), - toPlace = c(-1.59499, 53.81743), mode = "WALK") - -route_walk = otp_plan(otpcon, - fromPlace = c(-1.63078, 53.84675), - toPlace = c(-1.59499, 53.81743), mode = "WALK") - -route_transit = otp_plan(otpcon, - fromPlace = c(-1.63078, 53.84675), - toPlace = c(-1.59499, 53.81743), mode = c("WALK","TRANSIT")) - - -qtm(sf::st_zm(route_transit), lines.col = "mode", lines.lwd = 3) - -otp_stop() diff --git a/code-r/p1.R b/code-r/p1.R deleted file mode 100644 index 3504adc..0000000 --- a/code-r/p1.R +++ /dev/null @@ -1,38 +0,0 @@ -library(sf) -library(tmap) -library(dplyr) - -region = "west-yorkshire" - -u = paste0( - "https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/", - region, - "/l.geojson" - ) - -desire_lines = read_sf(u) -desire_lines_1000 = desire_lines %>% - top_n(1000, all) -plot(desire_lines_1000$geometry) - -car_dependent_routes = desire_lines_1000 %>% - mutate(percent_drive = car_driver / all * 100) %>% - filter(rf_dist_km < 3 & rf_dist_km > 1) - -b = c(0, 25, 50, 75) -tm_shape(car_dependent_routes) + - tm_lines(col = "percent_drive", lwd = "all", scale = 5, breaks = b, palette = "-inferno") - - - -# roads in Leeds -library(osmdata) -# roads_leeds = opq("leeds") %>% -# add_osm_feature("highway") %>% -# osmdata_sf() -# cycleway_leeds = opq("leeds") %>% -# add_osm_feature("highway", "cycleway") %>% -# osmdata_sf() -library(sf) -cw = cycleway_leeds$osm_lines -plot(cw) \ No newline at end of file diff --git a/code-r/p3-2020.R b/code-r/p3-2020.R deleted file mode 100644 index 4a6c092..0000000 --- a/code-r/p3-2020.R +++ /dev/null @@ -1,28 +0,0 @@ -# aim: demo around practical 1 -library(tidyverse) - -d_by_bus = d %>% filter(like_bus_travel) -mean(d_by_bus$n_coffee) - -b_not_by_bus = d %>% filter(!like_bus_travel) -mean(b_not_by_bus$n_coffee) - -d %>% - group_by(like_bus_travel) %>% - summarise(mean_coffee = mean(n_coffee)) - -# tapply... - -zones = pct::get_pct_zones(region = "west-yorkshire") -sf::st_crs(zones) = 4326 -index_max_bicycle = which.max(zones$bicycle) -max(zones$bicycle) -zones$bicycle[index_max_bicycle] - -zone_max_bicycle = zones %>% filter(bicycle == max(bicycle)) -zone_max_bicycle_10 = zones %>% top_n(n = 20, wt = bicycle) -mapview::mapview(zone_max_bicycle_10) -zone_max_bicycle = zones[which.max(zones$bicycle), ] -plot(zone_max_bicycle) -sf::st_crs(zone_max_bicycle) = 4326 -mapview::mapview(zone_max_bicycle) diff --git a/code-r/p3.R b/code-r/p3.R deleted file mode 100644 index c9ed469..0000000 --- a/code-r/p3.R +++ /dev/null @@ -1,18 +0,0 @@ -# Practical 3 homework - -devtools::install_github("Nowosad/spDataLarge") -library(spDataLarge) -library(dplyr) -library(sf) - -names(bristol_zones) -plot(bristol_zones) - -zones_attr = bristol_od %>% - group_by(o) %>% - summarize_if(is.numeric, sum) %>% - dplyr::rename(geo_code = o) -zones_joined = left_join(bristol_zones, zones_attr, by = "geo_code") -sum(zones_joined$all) -#> [1] 238805 -names(zones_joined) diff --git a/code-r/p4-lecture-code.R b/code-r/p4-lecture-code.R deleted file mode 100644 index 4636b9e..0000000 --- a/code-r/p4-lecture-code.R +++ /dev/null @@ -1,138 +0,0 @@ -d_read.csv = read.csv("wu03ew_v2.csv") -nrow(d) -# [1] 2402201 -nrow(d_read.csv) -# [1] 2402201 -class(d) -# [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame" -class(d_read.csv) -read_csv -object.size(d) / 1e6 -# 270 bytes -names(d) -names_old = names(d) -names_new = snakecase::to_snake_case(names_old) -names_new -# [1] "area_of_residence" -# [2] "area_of_workplace" -# [3] "all_categories_method_of_travel_to_work" -# [4] "work_mainly_at_or_from_home" -# [5] "underground_metro_light_rail_tram" -# [6] "train" -# [7] "bus_minibus_or_coach" -# [8] "taxi" -# [9] "motorcycle_scooter_or_moped" -# [10] "driving_a_car_or_van" -# [11] "passenger_in_a_car_or_van" -# [12] "bicycle" -# [13] "on_foot" -# [14] "other_method_of_travel_to_work" -names_old[13] = "onFoot" -names_old -# [1] "Area of residence" -# [2] "Area of workplace" -# [3] "All categories: Method of travel to work" -# [4] "Work mainly at or from home" -# [5] "Underground, metro, light rail, tram" -# [6] "Train" -# [7] "Bus, minibus or coach" -# [8] "Taxi" -# [9] "Motorcycle, scooter or moped" -# [10] "Driving a car or van" -# [11] "Passenger in a car or van" -# [12] "Bicycle" -# [13] "onFoot" -# [14] "Other method of travel to work" -names_new -# [1] "area_of_residence" -# [2] "area_of_workplace" -# [3] "all_categories_method_of_travel_to_work" -# [4] "work_mainly_at_or_from_home" -# [5] "underground_metro_light_rail_tram" -# [6] "train" -# [7] "bus_minibus_or_coach" -# [8] "taxi" -# [9] "motorcycle_scooter_or_moped" -# [10] "driving_a_car_or_van" -# [11] "passenger_in_a_car_or_van" -# [12] "bicycle" -# [13] "on_foot" -# [14] "other_method_of_travel_to_work" - - - names(d) -# [1] "Area of residence" -# [2] "Area of workplace" -# [3] "All categories: Method of travel to work" -# [4] "Work mainly at or from home" -# [5] "Underground, metro, light rail, tram" -# [6] "Train" -# [7] "Bus, minibus or coach" -# [8] "Taxi" -# [9] "Motorcycle, scooter or moped" -# [10] "Driving a car or van" -# [11] "Passenger in a car or van" -# [12] "Bicycle" -# [13] "On foot" -# [14] "Other method of travel to work" -names(d) = names_new -names_new[3] = "all" -names(d) = names_new -names(d) -# [1] "area_of_residence" "area_of_workplace" -# [3] "all" "work_mainly_at_or_from_home" -# [5] "underground_metro_light_rail_tram" "train" -# [7] "bus_minibus_or_coach" "taxi" -# [9] "motorcycle_scooter_or_moped" "driving_a_car_or_van" -# [11] "passenger_in_a_car_or_van" "bicycle" -# [13] "on_foot" "other_method_of_travel_to_work" - - - names(d)[5] -# [1] "underground_metro_light_rail_tram" -names(d)[5] = "metro" -# d_small = d %>% select(c(1, 2, 3, 12)) -object.size(d_small) / 1e6 -# 77.8 bytes -saveRDS(d_small, "d_small.Rds") - - - names(d_small) -# [1] "area_of_residence" "area_of_workplace" "all" -# [4] "bicycle" -d2 = mutate(d_small, bicycle / all) -names(d2) -# [1] "area_of_residence" "area_of_workplace" "all" -# [4] "bicycle" "bicycle/all" -d2 = mutate(d_small, pcycle = bicycle / all) -d2 -# # A tibble: 2,402,201 x 5 -# area_of_residence area_of_workplace all bicycle pcycle -# -# 1 E02000001 E020000011506 33 0.0219 -# 2 E02000001 E02000014 2 0 0 -# 3 E02000001 E02000016 3 0 0 -# 4 E02000001 E02000025 1 0 0 -# 5 E02000001 E02000028 1 0 0 -# 6 E02000001 E02000051 1 0 0 -# 7 E02000001 E02000053 2 0 0 -# 8 E02000001 E02000057 1 0 0 -# 9 E02000001 E02000058 1 0 0 -# 10 E02000001 E02000059 1 0 0 -# # … with 2,402,191 more rows -names(d)[5] = "metro" - - - - library(ukboundaries) -# Loading required package: sf -# Linking to GEOS 3.7.0, GDAL 2.3.2, PROJ 5.2.0 -# Using default data cache directory ~/.ukboundaries/cache -# Use cache_dir() to change it. -# Contains National Statistics data © Crown copyright and database right2019 -# Contains OS data © Crown copyright and database right, 2019 -# See https://www.ons.gov.uk/methodology/geography/licences - - - - \ No newline at end of file diff --git a/code-r/p5-pct.R b/code-r/p5-pct.R deleted file mode 100644 index e69de29..0000000 diff --git a/code-r/p7-ggplot2-test.R b/code-r/p7-ggplot2-test.R deleted file mode 100644 index 1195843..0000000 --- a/code-r/p7-ggplot2-test.R +++ /dev/null @@ -1,64 +0,0 @@ -crashes = stats19::get_stats19(year = 2018, type = "accidents") -casualties = stats19::get_stats19(year = 2018, type = "casualties") -head(crashes) -names(crashes) -summary(crashes$speed_limit) -summary(crashes$datetime) -plot(crashes$datetime) -head(crashes$datetime) -head(crashes$police_force) -plot(factor(crashes$police_force)) - -library(ggplot2) - -ggplot(crashes, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill") - - -library(stplanr) -library(sf) -library(tmap) - -flowlines_sf -tm_shape(flowlines_sf) + - tm_lines() + - tm_shape(flowlines_sf) + - tm_lines(lwd = 5, col = "red", alpha = 0.2) + - tm_shape(routes_fast_sf) + - tm_lines(col = "green") - - -# get travel data for UK regions -pct::pct_regions$region_name -mapview::mapview(pct::pct_regions) - -region_name = "cheshire" -zones_region = pct::get_pct_zones(region = region_name) -library(tmap) -tm_shape(zones_region) + tm_polygons("bicycle") - -desire_lines = pct::get_pct_lines(region = region_name) -tm_shape(zones_region) + tm_polygons("bicycle") + - tm_shape(desire_lines) + tm_lines("bicycle", palette = "viridis") - -# get crash data -crashes = stats19::get_stats19(year = 2018, type = "ac") -crashes_sf = stats19::format_sf(crashes, lonlat = TRUE) -crashes_region = crashes_sf[zones_region, ] - -desire_lines = pct::get_pct_lines(region = region_name) -tm_shape(zones_region) + tm_polygons("bicycle") + - tm_shape(desire_lines) + tm_lines("bicycle", palette = "viridis") + - tm_shape(crashes_region) + tm_dots() - -plot(desire_lines$foot, desire_lines$bus) -m = lm(foot ~ bus + e_dist_km, data = desire_lines) -m -summary(m) - -library(osmdata) -tube_network = opq("new york") %>% - add_osm_feature(key = "railway", value = "subway") %>% - osmdata_sf() - -mapview::mapview(tube_network$osm_lines) diff --git a/code-r/setup.R b/code-r/setup.R deleted file mode 100644 index dfb3c55..0000000 --- a/code-r/setup.R +++ /dev/null @@ -1,9 +0,0 @@ -source("https://raw.githubusercontent.com/ITSLeeds/go/master/code/setup_function.R") -setup_R(pkgs = c("remotes", - "sf", - "tidyverse", - "cyclestreets", - "tmap", - "pct", - "stats19", - "stplanr")); rm(setup_R) \ No newline at end of file diff --git a/code-r/st_centroid_within.R b/code-r/st_centroid_within.R deleted file mode 100644 index 5dde426..0000000 --- a/code-r/st_centroid_within.R +++ /dev/null @@ -1,30 +0,0 @@ - - -#' Return features the centroids of which are inside another object -#' -#' -#' -sf::st_within -polygons = spData::lnd -polygons_central = polygons[polygons$NAME == "City of London", ] -study_region = polygons[polygons_central, ] -study_region = sf::st_union(study_region) -subset_touching = polygons[study_region, ] -plot(polygons$geometry) -plot(subset_touching, col = "grey", add = TRUE) -plot(study_region, col = "red", add = TRUE) - -# Function to return only polygons whose centroids are inside -x = polygons -y = study_region -filter_polygon_centroids = function(x, y) { - x_centroids = sf::st_centroid(x) - x_in_y = sf::st_intersects(x_centroids, y) - x_in_y_logical = lengths(x_in_y) > 0 - x[x_in_y_logical, ] -} - -subset_test = filter_polygon_centroids(x = polygons, y = study_region) -plot(subset_test, col = "green", add = TRUE) - -# Test output of st_intersects.. diff --git a/code-r/test-document.Rmd b/code-r/test-document.Rmd deleted file mode 100644 index d166c7e..0000000 --- a/code-r/test-document.Rmd +++ /dev/null @@ -1,52 +0,0 @@ ---- -output: pdf_document -title: "Test document" ---- - -Hello world! - -```{r} -print("hello") -``` - - -```{r} -2 + 8 -``` - -```{python} -1 + 1 -``` - -```{r} -plot(1:9) -``` - -```{r, eval=FALSE} -library(osmdata) -data_osm = opq("leeds uk") %>% - add_osm_feature(key = "name", value = "Cycle Superhighway 1") %>% - osmdata_sf() -``` - -```{r, eval=FALSE, echo=FALSE} -saveRDS(data_osm, "data_osm_cycle_superhighway.Rds") -piggyback::pb_upload("data_osm_cycle_superhighway.Rds") -piggyback::pb_download_url("data_osm_cycle_superhighway.Rds") -``` - -The following code downloads data on the cycleway. -Source: OpenStreetMap.^[ -See https://www.openstreetmap.org/ -] - - -```{r} -library(tidyverse) -library(tmap) -# if the previous command fails, try: -data_osm = readRDS(url("https://github.com/ITSLeeds/TDS/releases/download/0.20.1/data_osm_cycle_superhighway.Rds")) -cycleway_100m_buffer = stplanr::geo_buffer(data_osm$osm_lines, dist = 1000) -tm_shape(data_osm$osm_lines) + tm_lines() -``` - diff --git a/code-r/test-script.R b/code-r/test-script.R deleted file mode 100644 index 508cb12..0000000 --- a/code-r/test-script.R +++ /dev/null @@ -1,36 +0,0 @@ -x = 1:9 -x_squared = x^2 -x_squared -y = 10:2 -x_plus_y = x+y -class(x_plus_y) - -x2 = x + 0.2 -class(x2) -typeof(x2) -x_character = "hello" -class(x_character) - -x_character_numeric = c(x_character, x) -class(x_character_numeric) -x2 * x -length(x2) -x2 * as.numeric(x_character_numeric[2:10]) - -d = data.frame(x_character, x, stringsAsFactors = F) -sapply(d, class) - -1:9 # Ctl+Enter - -# real world dataset -zones = pct::get_pct_zones(region = "isle-of-wight") # tab autocomplete -zones -class(zones) -plot(zones) -mapview::mapview(zones) -class(zones$geo_name) -class(zones$all) -x_all = zones$all -class(x_all) -x_all -class(zones$geometry) diff --git a/code-r/timetable-from-ics.R b/code-r/timetable-from-ics.R deleted file mode 100644 index d319fc3..0000000 --- a/code-r/timetable-from-ics.R +++ /dev/null @@ -1,24 +0,0 @@ - -library(tidyverse) -download.file("https://outlook.office365.com/owa/calendar/63f6c4e85d124df6a20656ade8e71faa@leeds.ac.uk/ce6c20fb9b724845be2e4b8449f111d912766985686605660817/calendar.ics", "tds-calendar-2022.ics") -ics = calendar::ic_read("tds-calendar-2022.ics") -names(ics) -names(ics) = gsub(pattern = ";TZID=GMT Standard Time", replacement = "", x = names(ics)) -head(ics$DTSTART) -calendar::ic_date(.Last.value) - -tt_csv = ics %>% - mutate_at(vars(matches("DT")), calendar::ic_datetime) %>% - mutate(date = as.Date(DTSTART), duration = DTEND - DTSTART) %>% - select(SUMMARY, date, duration) %>% - mutate(Start_time = case_when( - str_detect(SUMMARY, "Lecture|Prac") ~ "09:00", - str_detect(SUMMARY, "eminar|Dead") ~ "13:00" - )) %>% - slice(-4) # todo: remove if fixed upstream -# %>% -# filter(SUMMARY == "TDS Practical") -names(tt_csv) = tolower(names(tt_csv)) -tt_csv - -readr::write_csv(tt_csv, "timetable.csv") diff --git a/code-test/README.md b/code-test/README.md deleted file mode 100644 index af562e0..0000000 --- a/code-test/README.md +++ /dev/null @@ -1 +0,0 @@ -This is a place to put test scripts that are not maintained. diff --git a/code-test/chapter2.R b/code-test/chapter2.R deleted file mode 100644 index 162b661..0000000 --- a/code-test/chapter2.R +++ /dev/null @@ -1,31 +0,0 @@ -# intro code from chapter 2 of -# https://itsleeds.github.io/rrsrr/basics.html - -# x = 3r # this creates an error -test_object = 1:3 -test_object2 = c(1, 2, 3) -casualty_type = c("pedestrian", "cyclist", "cat") -casualty_age = seq(from = 20, to = 60, by = 20) -class(casualty_type) -class(casualty_age) -crashes = data.frame(casualty_type, casualty_age) -library(tidyverse) -ggplot(crashes) + geom_point(aes(casualty_type, casualty_age)) -casualty_type[2] -crashes[2, ] -crashes[crashes$casualty_age > 25, ] -crashes %>% - filter(casualty_age > 25) - -accidents_2020 = stats19::get_stats19(year = 2020, type = "casualties") -View(accidents_2020) -unique(accidents_2020$road_type) -accidents_2020_one_way = accidents_2020 %>% - filter(road_type == "One way street") -table(accidents_2020_one_way) - -crashes_sf = stats19::format_sf(x = accidents_2020_one_way) - -library(tmap) -tmap_mode("view") -qtm(crashes_sf) diff --git a/code-test/filling-nas-issue-120.md b/code-test/filling-nas-issue-120.md deleted file mode 100644 index 46a192f..0000000 --- a/code-test/filling-nas-issue-120.md +++ /dev/null @@ -1,8 +0,0 @@ - - -``` r -d = c("Road A", NA, NA, "Road B") -zoo::na.locf(d) -``` - - [1] "Road A" "Road A" "Road A" "Road B" diff --git a/code-test/filling-nas-issue-120.qmd b/code-test/filling-nas-issue-120.qmd deleted file mode 100644 index bd0c8ba..0000000 --- a/code-test/filling-nas-issue-120.qmd +++ /dev/null @@ -1,8 +0,0 @@ ---- -format: gfm ---- - -```{r} -d = c("Road A", NA, NA, "Road B") -zoo::na.locf(d) -``` \ No newline at end of file diff --git a/code-test/filling-nas-issue-120_files/libs/clipboard/clipboard.min.js b/code-test/filling-nas-issue-120_files/libs/clipboard/clipboard.min.js deleted file mode 100644 index 1103f81..0000000 --- a/code-test/filling-nas-issue-120_files/libs/clipboard/clipboard.min.js +++ /dev/null @@ -1,7 +0,0 @@ -/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */ -!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1.anchorjs-link,.anchorjs-link:focus{opacity:1}",A.sheet.cssRules.length),A.sheet.insertRule("[data-anchorjs-icon]::after{content:attr(data-anchorjs-icon)}",A.sheet.cssRules.length),A.sheet.insertRule('@font-face{font-family:anchorjs-icons;src:url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype")}',A.sheet.cssRules.length)),h=document.querySelectorAll("[id]"),t=[].map.call(h,function(A){return A.id}),i=0;i\]./()*\\\n\t\b\v\u00A0]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),A=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||A||!1}}}); -// @license-end \ No newline at end of file diff --git a/code-test/filling-nas-issue-120_files/libs/quarto-html/quarto-syntax-highlighting.css b/code-test/filling-nas-issue-120_files/libs/quarto-html/quarto-syntax-highlighting.css deleted file mode 100644 index d9fd98f..0000000 --- a/code-test/filling-nas-issue-120_files/libs/quarto-html/quarto-syntax-highlighting.css +++ /dev/null @@ -1,203 +0,0 @@ -/* quarto syntax highlight colors */ -:root { - --quarto-hl-ot-color: #003B4F; - --quarto-hl-at-color: #657422; - --quarto-hl-ss-color: #20794D; - --quarto-hl-an-color: #5E5E5E; - --quarto-hl-fu-color: #4758AB; - --quarto-hl-st-color: #20794D; - --quarto-hl-cf-color: #003B4F; - --quarto-hl-op-color: #5E5E5E; - --quarto-hl-er-color: #AD0000; - --quarto-hl-bn-color: #AD0000; - --quarto-hl-al-color: #AD0000; - --quarto-hl-va-color: #111111; - --quarto-hl-bu-color: inherit; - --quarto-hl-ex-color: inherit; - --quarto-hl-pp-color: #AD0000; - --quarto-hl-in-color: #5E5E5E; - --quarto-hl-vs-color: #20794D; - --quarto-hl-wa-color: #5E5E5E; - --quarto-hl-do-color: #5E5E5E; - --quarto-hl-im-color: #00769E; - --quarto-hl-ch-color: #20794D; - --quarto-hl-dt-color: #AD0000; - --quarto-hl-fl-color: #AD0000; - --quarto-hl-co-color: #5E5E5E; - --quarto-hl-cv-color: #5E5E5E; - --quarto-hl-cn-color: #8f5902; - --quarto-hl-sc-color: #5E5E5E; - --quarto-hl-dv-color: #AD0000; - --quarto-hl-kw-color: #003B4F; -} - -/* other quarto variables */ -:root { - --quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; -} - -pre > code.sourceCode > span { - color: #003B4F; -} - -code span { - color: #003B4F; -} - -code.sourceCode > span { - color: #003B4F; -} - -div.sourceCode, -div.sourceCode pre.sourceCode { - color: #003B4F; -} - -code span.ot { - color: #003B4F; - font-style: inherit; -} - -code span.at { - color: #657422; - font-style: inherit; -} - -code span.ss { - color: #20794D; - font-style: inherit; -} - -code span.an { - color: #5E5E5E; - font-style: inherit; -} - -code span.fu { - color: #4758AB; - font-style: inherit; -} - -code span.st { - color: #20794D; - font-style: inherit; -} - -code span.cf { - color: #003B4F; - font-style: inherit; -} - -code span.op { - color: #5E5E5E; - font-style: inherit; -} - -code span.er { - color: #AD0000; - font-style: inherit; -} - -code span.bn { - color: #AD0000; - font-style: inherit; -} - -code span.al { - color: #AD0000; - font-style: inherit; -} - -code span.va { - color: #111111; - font-style: inherit; -} - -code span.bu { - font-style: inherit; -} - -code span.ex { - font-style: inherit; -} - -code span.pp { - color: #AD0000; - font-style: inherit; -} - -code span.in { - color: #5E5E5E; - font-style: inherit; -} - -code span.vs { - color: #20794D; - font-style: inherit; -} - -code span.wa { - color: #5E5E5E; - font-style: italic; -} - -code span.do { - color: #5E5E5E; - font-style: italic; -} - -code span.im { - color: #00769E; - font-style: inherit; -} - -code span.ch { - color: #20794D; - font-style: inherit; -} - -code span.dt { - color: #AD0000; - font-style: inherit; -} - -code span.fl { - color: #AD0000; - font-style: inherit; -} - -code span.co { - color: #5E5E5E; - font-style: inherit; -} - -code span.cv { - color: #5E5E5E; - font-style: italic; -} - -code span.cn { - color: #8f5902; - font-style: inherit; -} - -code span.sc { - color: #5E5E5E; - font-style: inherit; -} - -code span.dv { - color: #AD0000; - font-style: inherit; -} - -code span.kw { - color: #003B4F; - font-style: inherit; -} - -.prevent-inlining { - content: ".tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1} \ No newline at end of file diff --git a/courses/2day-test-code.R b/courses/2day-test-code.R deleted file mode 100644 index 3124918..0000000 --- a/courses/2day-test-code.R +++ /dev/null @@ -1,55 +0,0 @@ -# The following code should work on every computer - -# test ability to install packages -install.packages("remotes") - -# install packages we'll use (remotes is more efficient at installing them) -pkgs = c( - "pct", - "stats19", - "stplanr", - "tidyverse", - "sf", - "tmap", - "dodgr", - "osmdata", - "pbapply" -) -remotes::install_cran(pkgs) - -# load the pkgs -lapply(pkgs, library, character.only = TRUE) -tmap_mode("plot") # use "view" for interactive maps - -# Test link with osmdata works: -osm_data = opq(bbox = "westminster") %>% - add_osm_feature(key = "name", value = "Horseferry Road") %>% - osmdata_sf() -horseferry_road = osm_data$osm_lines - -qtm(horseferry_road) -horseferry_region = horseferry_road %>% - st_transform(27700) %>% - st_buffer(500) %>% - st_union() %>% - st_transform(4326) - -# Test stats19 data downloads -a = get_stats19(year = 2017, type = "acc", ask = FALSE) -asf = format_sf(a, lonlat = TRUE) -horseferry_crashes = asf[horseferry_region, ] -plot(horseferry_crashes) - -# Test pct data downloads -rnet = get_pct_rnet(region = "london") -horseferry_routenet = rnet[horseferry_region, ] - -# Final combined plot -tm_shape(horseferry_region) + - tm_borders() + - tm_shape(horseferry_road) + - tm_lines("red", scale = 9) + - tm_shape(horseferry_routenet) + - tm_lines(lwd = "bicycle", scale = 9, col = "blue") + - tm_shape(horseferry_crashes) + - tm_dots("accident_severity", size = 0.5, alpha = 0.5, palette = "magma") diff --git a/courses/2day.Rmd b/courses/2day.Rmd deleted file mode 100644 index c4e2cb5..0000000 --- a/courses/2day.Rmd +++ /dev/null @@ -1,404 +0,0 @@ ---- -title: "2 day course: R for Spatial Transport Data" -output: github_document -bibliography: ../tds.bib ---- - -See https://github.com/ITSLeeds/TDS - -Assumed prior knowledge: - -- Working knowledge of R, e.g. have completed: - - Introduction to R free DataCamp course: https://www.datacamp.com/courses/free-introduction-to-r - - Recommended reading: Section 4.2 of *Efficient R Programming* [@gillespie_efficient_2016]: https://csgillespie.github.io/efficientR/efficient-workflow.html#package-selection - -- Some knowledge of tidyverse is highly recommended, e.g. already know the contents of or have read-up on **and tried examples from** the following 'cheatsheets': - - Tidyverse for Beginners DataCamp Cheat Sheet (see [here](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Tidyverse+Cheat+Sheet.pdf)). **At a minimum you will have completed up to Chapter 5 on this online course** (this may take ~2 hours) - - Data wrangling with dplyr and tidyr RStudio cheet cheet (see [here](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf)) - - - -## Computing Requirements - -- DfT will provide desktops with recent versions of R and RStudio installed, including: - - R 3.5.3 recommended (although any R 3.5 release should work): https://cran.r-project.org/ - - RStudio 1.1.436 - see https://www.rstudio.com/products/rstudio/download/ - - Will require up-to-date versions of **tidyverse** and **sf** packages, at a minimum - - Other packages we'll use: - - stats19 - - stplanr - - tmap - - devtools - - The GitHub package spDataLarge, which can be installed as follows: - -```{r, eval=FALSE} -devtools::install_github("Nowosad/spDataLarge") -``` - -- Own laptops allowed, provided necessary data is installed - -- Data: all data will be either provided by packages or downloaded on the day (assuming good internet) - - A test to check if data downloads work is accessing stats19 data (check this works): - -```{r, eval=FALSE} -crashes = stats19::get_stats19(year = 2017, type = "accidents", ask = FALSE) -``` - -- A script to test set-up will be provided to test it all works - -## Venue and course times - -### Day 1: Introduction to R/RStudio - -Course times each day: - -- 09:30 - 10:00 (set-up) - -- 10:00 - 11:00 How to use R/RStudio effectively (MM) - -- break - -- 11:15 - 12:30 Using packages: example with sf/ggplot2 (RL) - -- 12:30 - 13:30: lunch - -- 13:30 - 14:45 Spatial data analysis (MM) - -- break - -- 15:00 - 16:00 Visualising spatial datasets (RL) - -- 16:00 - 16:30 (Q&A) - -### Day 2: - -Course times each day: - -- 09:30 - 11:00 stats19 data analysis - with spatial/temporal analysis (RL) - -- break - -- 11:15 - 12:30 OD data with stplanr (RL) - -- 12:30 - 13:30: lunch - -- 13:30 - 14:45 Local route network analysis (MM) - -- break - -- 15:00 - 16:00 Data and methods for assessing cycling potential (RL) - -- 16:00 - 16:30 (Q&A) - - - -## Optional extra reading - -- Optional extra reading: for context, you may want to read-up on: - - Overview of GIS in R, e.g. in Chapter 1 of *Geocomputation with R* or this blog post: https://www.jessesadler.com/post/gis-with-r-intro/ - - stplanr: A package for transport planning [@lovelace_stplanr:_2018] - - R for data science [@grolemund_r_2016] - - For an overview of spatial transport data types, see Chapter 12 of *Geocomputation with R* [@lovelace_geocomputation_2019]: http://geocompr.robinlovelace.net/transport.html - -## Notes - -The overview slides for the course can be found here: https://itsleeds.github.io/TDS/slides/2day-slides#1 - -### stats19 exercises - -1. Download and plot all crashes reported in Great Britain in 2017 (hint: see [the stats19 vignette](https://cran.r-project.org/web/packages/stats19/vignettes/stats19.html)) -1. Find the function in the `stats19` package that converts a `data.frame` object into an `sf` data frame. Use this function to convert the road crashes into an `sf` object, called `crashes_sf`, for example. -1. Filter crashes that happened in the Isle of Wight based on attribute data (hint: the relevant column contains the word `local`) -1. Filter crashes happened in the Isle of Wight using geographic subsetting (hint: remember `st_crs()`?) -1. Bonus: Which type of spatial subsetting yielded more results and why? -1. Bonus: how many crashes happened in each zone? -1. Create a new column called `month` in the crash data using the function `lubridate::month()` and the `date` column. -1. Create an object called `a_iow_may` representing all the crashes that happened in the Isle of Wight in the month of May -1. Bonus: Calculate the average (`mean`) speed limit associated with each crash that happened in May across the zones of the Isle of Wight (the result is shown in the map) - - -Short keys: - -- Alt-Shift-K: shows short keys -- Tab: does autocompletions -- Ctl-Shift-A: format code - -Link to transportAPI: https://developer.transportapi.com/signup - -Link to chapter: https://geocompr.robinlovelace.net/transport.html - -## Example code - -### From the morning of day 1 - -```{r, eval=FALSE} -library(pct) - -x = 1:5 -y = c(0,1,3,9,18) -# Ctl+2 - -cat = data.frame( - name = c("Tiddles", "Chester", "Shadow"), - type = c("Tabby", "Persian", "Siamese"), - age = c(1, 3, 5), - likes_milk = c(TRUE, FALSE, TRUE), - stringsAsFactors = FALSE -) -class(cat$name) -even_numbers = seq(from = 2, to = 4000, by = 2) -random_letters = sample(letters, size = 100, replace = TRUE) - -iow = pct::get_pct_zones(region = "isle-of-wight") -class(iow) -dim(iow) -iow = iow[1:9] -iow_geo = iow$geometry -plot(iow_geo) -plot(iow) -number_who_walk = iow$foot -class(number_who_walk) -summary(number_who_walk) -number_who_walk[c(1, 3, 9)] -sel = number_who_walk > 500 -number_who_walk[sel] -length(sel) -class(sel) -iow$many_walk = sel -iow_walk = iow[iow$many_walk, ] - -l = get_pct_lines("isle-of-wight") -l$percent_drive = - l$car_driver / l$all * 100 -dim(l) - -summary(l$rf_dist_km) -# identify short routes -sel = l$rf_dist_km < 3 -l_short = l[sel, ] - - -plot(l$geometry) -plot(l_short$geometry, add = TRUE, col = "red") - -l_order = l_short[order(l_short$percent_drive), ] -mapview::mapview(l_order[nrow(l_order), ]) - -library(dplyr) -l_short2 = l %>% - filter(rf_dist_km < 3) %>% - mutate(pdrive = car_driver / all) %>% - top_n(n = 3, wt = pdrive) -mapview::mapview(l_short2$geometry) - -``` - -### Afternoon of day 1 - -```{r, eval=FALSE} -library(sf) -library(pct) -library(spData) -#install.packages("spData") - -iow = get_pct_zones("isle-of-wight") -iow = st_transform(iow, 27700) -iow2 = iow[1,] -cents = get_pct_centroids("isle-of-wight", - geography = "lsoa") -cents = st_transform(cents, 27700) -cent2 = cents[iow2,] -plot(cents$geometry) -plot(iow$geometry) -plot(cent2, col = "red", add = TRUE) -cent3 = cents[iow2,, op = st_disjoint] -plot(cent3, col = "blue", add = TRUE) - - -plot(nz$geom) -nz - -nz_islands = nz %>% - group_by(Island) %>% - summarise(Population = sum(Population)) -plot(nz_islands) - -cents_buff = st_buffer(cents, 10000) -plot(cents_buff$geometry) -plot(cents$geometry, col = "red", add = T) - - -canterbury = nz[nz$Name == "Canterbury",] -cant_height = nz_height[canterbury,] - -nz_height2 = st_join(nz_height, nz) - -nz_height3 = nz_height2 %>% - group_by(Name) %>% - summarise(numb_mountain = n()) %>% - select(Name, numb_mountain) %>% - st_drop_geometry() - -nz_joined = left_join(nz["Name"], nz_height3) -plot(nz_joined) - -nz_agg = aggregate(nz_height[1], nz, FUN = length) -plot(nz_agg) - - - -nrow(cant_height) -nz$geom = canterbury$geom -plot(nz) -nz = spData::nz -``` - -### Code to download and visualise geo data - -```{r, eval=FALSE} -u = "https://opendata.arcgis.com/datasets/66f41d4ccc8a4fce9137b3a1947bfcdb_0.kml?outSR=%7B%22wkid%22%3A27700%2C%22latestWkid%22%3A27700%7D" -download.file(url = u, destfile = "d.kml") -f = list.files(pattern = "kml") -s = read_sf(f) -plot(s$geometry) -nrow(s) -mapview::mapview(s) -s_simple = rmapshaper::ms_simplify(input = s, 0.1) -object.size(s) -object.size(s_simple) -mapview::mapview(s_simple) -?rmapshaper::ms_simplify - -library(spData) -library(tmap) - -tmap_mode("plot") -tm_shape(nz) + - tm_fill( - "Population", - palette = "RdYlBu", - alpha = 0.2) + - tm_shape(nz_height) + - tm_dots() -class(m) -tmap_save(m, "m.html") - -mapview::mapview(nz) - -library(ggplot2) -nz$geometry = nz$geom -ggplot(nz) + - geom_sf() - -``` - - -### Code from the morning of day 2 - -```{r, eval=FALSE} -library(tidyverse) - -iow = pct::get_pct_zones("isle-of-wight") -class(iow) -iow$is_small = iow$all < 3000 -iow_small = iow[iow$is_small, ] -nrow(iow_small) - -iow_small2 = iow %>% - filter(all < 3000) - -nrow(iow_small) == nrow(iow_small2) -identical(iow_small, iow_small2) - -iow_mutated = iow %>% - mutate(is_small = all < 3000) %>% - filter(is_small) - -identical(iow_small2, iow_mutated) - -median_car = median(iow$car_driver) - -# logical vector -sel_high_car = iow$car_driver > median_car - -iow_high_car = mutate( - iow, - sel_high_car = car_driver > median_car - ) -names(iow_high_car) - -iow_min = iow_high_car %>% - select(1:9, sel_high_car) %>% - sf::st_drop_geometry() - -iow_min %>% pull(geo_name) - -iow_min %>% - mutate(n1 = str_detect(iow$geo_name, "1")) %>% - pull(n1) %>% - table() - -table(str_detect(iow$geo_name, "1")) - -iow_foot = iow %>% - select(foot) - -plot(iow_foot) -library(tmap) -tm_shape(iow_foot) + - tm_polygons("foot") - - -iow %>% - filter(car_driver > median(car_driver)) - - - -iow_high_car = iow_min %>% - filter(car_driver > median_car) -iow_small_car_high = iow_high_car %>% - mutate(is_small = all < 3000) %>% - filter(is_small) -nrow(iow_small_car_high) - - - - - - -iow = pct::get_pct_zones("isle-of-wight") - -iow_small_high_car = iow %>% - filter(car_driver > median(car_driver)) %>% - filter(all < 3000) - -grepl("cat", c("cat", "dog", "cat2")) -str_detect(c("cat", "dog", "cat2"), "cat") - -library(stats19) -crashes_raw = get_stats19(2017, "ac", ask = FALSE, format = FALSE) -crashes_raw -crashes = get_stats19(2017, "ac", ask = FALSE) -dim(crashes) -class(crashes) -crashes - -# remove na values -crashes_no_na = crashes %>% - filter(!is.na(longitude) & !is.na(latitude)) - -crashes_sf2 = st_as_sf( - crashes_no_na, - coords = c("longitude", "latitude") - ) - -# quicker with function -crashes_sf = format_sf(crashes) -``` - - -## References - - - - diff --git a/courses/2day.md b/courses/2day.md deleted file mode 100644 index 28eec1c..0000000 --- a/courses/2day.md +++ /dev/null @@ -1,357 +0,0 @@ -2 day course: R for Spatial Transport Data -================ - -See - -Assumed prior knowledge: - - - Working knowledge of R, e.g. have completed: - - Introduction to R free DataCamp course: - - - Recommended reading: Section 4.2 of *Efficient R Programming* - (Gillespie and Lovelace 2016): - - - Some knowledge of tidyverse is highly recommended, e.g. already know - the contents of or have read-up on **and tried examples from** the - following ‘cheatsheets’: - - Tidyverse for Beginners DataCamp Cheat Sheet (see - [here](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Tidyverse+Cheat+Sheet.pdf)). - **At a minimum you will have completed up to Chapter 5 on this - online course** (this may take ~2 hours) - - Data wrangling with dplyr and tidyr RStudio cheet cheet (see - [here](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf)) - - - -## Computing Requirements - - - DfT will provide desktops with recent versions of R and RStudio - installed, including: - - R 3.5.3 recommended (although any R 3.5 release should work): - - - RStudio 1.1.436 - see - - - Will require up-to-date versions of **tidyverse** and **sf** - packages, at a minimum - - Other packages we’ll use: - - stats19 - - stplanr - - tmap - - devtools - - The GitHub package spDataLarge, which can be installed as - follows: - - - -``` r -devtools::install_github("Nowosad/spDataLarge") -``` - - - Own laptops allowed, provided necessary data is installed - - - Data: all data will be either provided by packages or downloaded on - the day (assuming good internet) - - - A test to check if data downloads work is accessing stats19 data - (check this -works): - - - -``` r -crashes = stats19::get_stats19(year = 2017, type = "accidents", ask = FALSE) -``` - - - A script to test set-up will be provided to test it all works - -## Venue and course times - -### Day 1: Introduction to R/RStudio - -Course times each day: - - - 09:30 - 10:00 (set-up) - - - 10:00 - 11:00 How to use R/RStudio effectively (MM) - - - break - - - 11:15 - 12:30 Using packages: example with sf/ggplot2 (RL) - - - 12:30 - 13:30: lunch - - - 13:30 - 14:45 Spatial data analysis (MM) - - - break - - - 15:00 - 16:00 Visualising spatial datasets (RL) - - - 16:00 - 16:30 (Q\&A) - -### Day 2: - -Course times each day: - - - 09:30 - 11:00 stats19 data analysis - with spatial/temporal analysis - (RL) - - - break - - - 11:15 - 12:30 OD data with stplanr (RL) - - - 12:30 - 13:30: lunch - - - 13:30 - 14:45 Local route network analysis (MM) - - - break - - - 15:00 - 16:00 Data and methods for assessing cycling potential (RL) - - - 16:00 - 16:30 (Q\&A) - - - -## Optional extra reading - - - Optional extra reading: for context, you may want to read-up on: - - Overview of GIS in R, e.g. in Chapter 1 of *Geocomputation with - R* or this blog post: - - - stplanr: A package for transport planning (Lovelace and Ellison - 2018) - - R for data science (Grolemund and Wickham 2016) - - For an overview of spatial transport data types, see Chapter 12 - of *Geocomputation with R* - (**???**): - - -## Notes - -The overview slides for the course can be found here: - - -### stats19 exercises - -1. Download and plot all crashes reported in Great Britain in 2017 - (hint: see [the stats19 - vignette](https://cran.r-project.org/web/packages/stats19/vignettes/stats19.html)) -2. Find the function in the `stats19` package that converts a - `data.frame` object into an `sf` data frame. Use this function to - convert the road crashes into an `sf` object, called `crashes_sf`, - for example. -3. Filter crashes that happened in the Isle of Wight based on attribute - data (hint: the relevant column contains the word `local`) -4. Filter crashes happened in the Isle of Wight using geographic - subsetting (hint: remember `st_crs()`?) -5. Bonus: Which type of spatial subsetting yielded more results and - why? -6. Bonus: how many crashes happened in each zone? -7. Create a new column called `month` in the crash data using the - function `lubridate::month()` and the `date` column. -8. Create an object called `a_iow_may` representing all the crashes - that happened in the Isle of Wight in the month of May -9. Bonus: Calculate the average (`mean`) speed limit associated with - each crash that happened in May across the zones of the Isle of - Wight (the result is shown in the map) - -Short keys: - - - Alt-Shift-K: shows short keys - - Tab: does autocompletions - - Ctl-Shift-A: format code - -Link to transportAPI: - -Link to chapter: - -## Example code - -### From the morning of day 1 - -``` r -library(pct) - -x = 1:5 -y = c(0,1,3,9,18) -# Ctl+2 - -cat = data.frame( - name = c("Tiddles", "Chester", "Shadow"), - type = c("Tabby", "Persian", "Siamese"), - age = c(1, 3, 5), - likes_milk = c(TRUE, FALSE, TRUE), - stringsAsFactors = FALSE -) -class(cat$name) -even_numbers = seq(from = 2, to = 4000, by = 2) -random_letters = sample(letters, size = 100, replace = TRUE) - -iow = pct::get_pct_zones(region = "isle-of-wight") -class(iow) -dim(iow) -iow = iow[1:9] -iow_geo = iow$geometry -plot(iow_geo) -plot(iow) -number_who_walk = iow$foot -class(number_who_walk) -summary(number_who_walk) -number_who_walk[c(1, 3, 9)] -sel = number_who_walk > 500 -number_who_walk[sel] -length(sel) -class(sel) -iow$many_walk = sel -iow_walk = iow[iow$many_walk, ] - -l = get_pct_lines("isle-of-wight") -l$percent_drive = - l$car_driver / l$all * 100 -dim(l) - -summary(l$rf_dist_km) -# identify short routes -sel = l$rf_dist_km < 3 -l_short = l[sel, ] - - -plot(l$geometry) -plot(l_short$geometry, add = TRUE, col = "red") - -l_order = l_short[order(l_short$percent_drive), ] -mapview::mapview(l_order[nrow(l_order), ]) - -library(dplyr) -l_short2 = l %>% - filter(rf_dist_km < 3) %>% - mutate(pdrive = car_driver / all) %>% - top_n(n = 3, wt = pdrive) -mapview::mapview(l_short2$geometry) -``` - -### Afternoon of day 1 - -``` r -library(sf) -library(pct) -library(spData) -#install.packages("spData") - -iow = get_pct_zones("isle-of-wight") -iow = st_transform(iow, 27700) -iow2 = iow[1,] -cents = get_pct_centroids("isle-of-wight", - geography = "lsoa") -cents = st_transform(cents, 27700) -cent2 = cents[iow2,] -plot(cents$geometry) -plot(iow$geometry) -plot(cent2, col = "red", add = TRUE) -cent3 = cents[iow2,, op = st_disjoint] -plot(cent3, col = "blue", add = TRUE) - - -plot(nz$geom) -nz - -nz_islands = nz %>% - group_by(Island) %>% - summarise(Population = sum(Population)) -plot(nz_islands) - -cents_buff = st_buffer(cents, 10000) -plot(cents_buff$geometry) -plot(cents$geometry, col = "red", add = T) - - -canterbury = nz[nz$Name == "Canterbury",] -cant_height = nz_height[canterbury,] - -nz_height2 = st_join(nz_height, nz) - -nz_height3 = nz_height2 %>% - group_by(Name) %>% - summarise(numb_mountain = n()) %>% - select(Name, numb_mountain) %>% - st_drop_geometry() - -nz_joined = left_join(nz["Name"], nz_height3) -plot(nz_joined) - -nz_agg = aggregate(nz_height[1], nz, FUN = length) -plot(nz_agg) - - - -nrow(cant_height) -nz$geom = canterbury$geom -plot(nz) -nz = spData::nz -``` - -### Code to download and visualise geo data - -``` r -u = "https://opendata.arcgis.com/datasets/66f41d4ccc8a4fce9137b3a1947bfcdb_0.kml?outSR=%7B%22wkid%22%3A27700%2C%22latestWkid%22%3A27700%7D" -download.file(url = u, destfile = "d.kml") -f = list.files(pattern = "kml") -s = read_sf(f) -plot(s$geometry) -nrow(s) -mapview::mapview(s) -s_simple = rmapshaper::ms_simplify(input = s, 0.1) -object.size(s) -object.size(s_simple) -mapview::mapview(s_simple) -?rmapshaper::ms_simplify - -library(spData) -library(tmap) - -tmap_mode("plot") -tm_shape(nz) + - tm_fill( - "Population", - palette = "RdYlBu", - alpha = 0.2) + - tm_shape(nz_height) + - tm_dots() -class(m) -tmap_save(m, "m.html") - -mapview::mapview(nz) - -library(ggplot2) -nz$geometry = nz$geom -ggplot(nz) + - geom_sf() -``` - -## References - -

- -
- -Gillespie, Colin, and Robin Lovelace. 2016. *Efficient R Programming: A -Practical Guide to Smarter Programming*. O’Reilly Media. -. - -
- -
- -Grolemund, Garrett, and Hadley Wickham. 2016. *R for Data Science*. 1 -edition. O’Reilly Media. - -
- -
- -Lovelace, Robin, and Richard Ellison. 2018. “Stplanr: A Package for -Transport Planning.” *The R Journal* 10 (2): 7–23. -. - -
- -
diff --git a/courses/2day/01 - How to use R.Rmd b/courses/2day/01 - How to use R.Rmd deleted file mode 100644 index 85a1957..0000000 --- a/courses/2day/01 - How to use R.Rmd +++ /dev/null @@ -1,284 +0,0 @@ ---- -title: "How to use R/RStudio effectively" -output: github_document ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` - -## Learning Outcomes - -- RStudio UI and main features -- R Objects and Functions -- Subsetting -- Plotting -- Help -- Installing Packages - -## Welcome to RStudio - -This course assumes that you have already got some basic knowledge of working with R. If you don't see the [prerequisites](https://github.com/ITSLeeds/TDS/blob/master/courses/2day.md). - -```{r rstudioui, echo=FALSE} -knitr::include_graphics("images/rstudio-ui.png") -``` - -### Projects - -Projects are a way to organise related work together. Each project has its own folder and Rproj file. - -Start a new project with: - -> File > New Project - -You can choose to create a new directory (folder) or associate a project with an existing directory. Make a new project called TDS and save it in a sensible place on your computer. Notice that TDS now appears in the top right of RStudio. - -**Always do your work within a project** - -### R Scripts - -We could simply type all our code into the console, but that would require us to retype all our code every time we wish to run it. So we usually save code in a script file (with the .R extension). - -Make a new script: - -> Flie > New File > Rscript - -Or use the new script button on the toolbar. - -Save the script and give it a sensible name like `TDS-lesson-1.R` with: - -> File > Save - -Or the save button on the toolbar. - -### Writing Code - -Let's start with some basic R operations - -```{r, eval=FALSE} -x <- 1:5 -y <- c(0,1,3,9,18) -plot(x, y) -``` - -This code creates two objects, both vectors of length == 5, and then plots them. - - -### Running Code - -We have several ways to run code within a script. - -1. Place the cursor on a line of code and press `CTRL + Enter` to run that line of code. -1. Highlight a block of code or part of a line of code and press `CTRL + Enter` to run the highlighted code. -1. Press `CTRL + Shift + Enter` to run all the code in a script. -1. Press the Run button on the toolbar to run all the code in a script. -1. Use the function `source()` to run all the code in a script e.g. `source("TDS-lesson-1.R")` - -### Vewing Objects - -Lets create some different types of object: - -```{r, eval=FALSE} -cat <- data.frame(name = c("Tiddles", "Chester", "Shadow"), - type = c("Tabby", "Persian", "Siamese"), - age = c(1, 3, 5), - likes_milk = c(TRUE, FALSE,TRUE)) -even_numbers <- seq(from = 2, to = 4000, by = 2) -random_letters <- sample(letters, size = 100, replace = TRUE) -small_matrix <- matrix(1:24, nrow = 12) -``` - -We can view the objects in a range of ways: - -1. Type the name of the object into the console e.g. `cat`, what happens if we try to view all 2000 even_numbers? -1. Use the `head()` function to view the first few values e.g. `head(even_numbers)` -1. Use the view table button next to matrix or data.frame objects in the environment tab. - -We can also get an overview of an object using a range of functions. - -1. `summary()` -1. `class()` -1. `class()` -1. `dim()` -1. `length()` - -**Exercise** try these functions, what results do they give? - -### Using Autocomplete - -RStudio can help you write code by autocompleting. RStudio will look for similar objects and functions after typing the first three letters of a name. - -```{r autocomp, echo=FALSE} -knitr::include_graphics("images/autocomplete.jpg") -``` - -When there is more than one option you can select from the list using the mouse or arrow keys. - -Within a function you can get a list of arguments by pressing Tab. - -```{r help, echo=FALSE} -knitr::include_graphics("images/fucntionhelp.jpg") -``` - -Notice the help popup. - -### Getting help - -Every function in R has a help page. You can view the help using `?` for example `?sum`. Many packages also contain vignettes, these are long form help documents containing examples and guides. `vignette()` will show a list of all the vignettes available, or you can show a specific vignette for example `vignette(topic = "sf1", package = "sf")`. - -### Commenting Code - -It is good practice to use comments in your code to explain what your code does. You can comment code using `#` - -For example: - -```{r, eval=FALSE} -# A whole line comment -x <- 1:5 # An inline comment -y <- x * 2 -``` - -You can comment a whole block of text by selecting it and using CTRL + Shift + C - -You can add a comment section using CTRL + Shift + R - - -### Cleaning your environment and removing objects - -The Environment tab shows all the objects in your environment, this includes Data, Values, and Functions. By default, new objects appear in the Global Environment but you can see other environments with the drop-down menu. For example, each package has its own environment. - -Sometimes you wish to remove things from your environment, perhaps because you no longer need them or things are getting cluttered. - -You can remove an object with the `rm()` function e.g. `rm(x)` or `rm(x,y)` or you can clear your whole environment with the broom button on the Environment Tab. - -### Debugging Code - -This code example will run, but we can see some of RStudio's debugging features by changing it. See that when the bracket is removed the red X and the underlying highlight the broken code. You may need to save the code you see the debugging prompt. - -```{r debug, echo=FALSE} -knitr::include_graphics("images/debug.jpg") -``` - -**Always address debugging prompts before running your code** - - -### Saving your work - -We have already seen that you can save an R script. You can also save R objects in the RDS format. - -```{r, eval=FALSE} -saveRDS(cat,"cat.Rds") -``` -We can also read back in our data. - -```{r, eval=FALSE} -cat2 <- readRDS("cat.Rds") -identical(cat, cat2) -``` - -R also supports many other formats. For example CSV files. - -```{r, eval=FALSE} -write.csv(cat, "cat.csv") -cat3 <- read.csv("cat.csv") -identical(cat3, cat) -``` -Notice that `cat3` and `cat` are not identical, what has changed? Hint: use `?write.csv`. - -### Subsetting - -We can subset any R object to just get part of the object. Subsetting can be done by either providing the positional numbers of the subset or logical vector of the same length. For two dimension object such as matrices and data.frames you can subset by row or column. -Subsetting is done using square brackets `[]` after the name of an object. - -```{r, eval=FALSE} -even_numbers[1:5] # Just the first five even_numbers -x[c(TRUE, FALSE,TRUE,FALSE,TRUE)] # The 1st, 3rd, and 5th element in x -cat[c(1,2),] # First and second row of cat -cat[,c(1,3)] # First and third column of cat -cat[,c("name","age")] # First and third column of cat by name -``` - -It is also possible to create logial vector for subsetting by creating a query -```{r, eval=FALSE} -x[x == 5] # Only when x == 5 (notice the use of double equals) -even_numbers[even_numbers < 50] # Just the even_numbers less than 50 -even_numbers[even_numbers %% 9 == 0] # Just the even_numbers that are a multiple of 9 -cat[cat$name == "Tiddles",] # THe rows where the name is Tiddles (notice the use of $) -``` - -### Dealing with NAs - -R object can have a value of NA. This is how R represents missing data. - -```{r, eval=FALSE} -z <- c(4,5,NA,7) -``` - -NA values are common in real-world data but can cause trouble, for example - -```{r, eval=FALSE} -sum(z) # Result is NA -``` - -Some functions can be told to ignore NA values. - -```{r, eval=FALSE} -sum(z, na.rm = TRUE) # Result is equal to 4 + 5 + 7 -``` - -You can find NAs using the `is.na()` function, and then remove them - -```{r, eval=FALSE} -is.na(z) -z_nona <- z[!is.na(z)] #Note the use of the not operator ! -sum(z) -``` - -Be careful of NAs especially in statistical analysis, for example, the average of a value excluding NAs may not be representative of the whole. - -### Packages - -R has lots of functionality built in, but the real value in R is the community of package developers. Packages add new functions to R. Some packages are so useful they have become almost essential while others are only used for specific purposes. - -There are two stages to using a package. - -#### Instaling a package. - -Packages that you don't have on your computer can be installed using `install.packages()` Packages come from [The Comprehensive R Archive Network](https://cran.r-project.org/) there are over 10,000 packages on CRAN. You only need to install a package once. - -**Note: it is bad practice to install packages within a script, as installing packages each time it runs risks corrupting the package** - -```{r, eval=FALSE} -install.packages("sf") -``` - -Once you have a package on your computer you need to add it to your current environment - -```{r, eval=FALSE} -library(sf) -``` - -It is good practice to shart your script by loading any packages that are required. - -There will be lots more about packages in the next session. - -## Now you are ready to use R - - -```{r smile} -eyes <- c(2.3,4,3.7,4) -eyes <- matrix(eyes, ncol = 2, byrow = T) -mouth <- c(2,2,2.5,1.3,3,1,3.5,1.3,4,2) -mouth <- matrix(mouth, ncol = 2, byrow = T) -plot(eyes, - type = "p", - main = "Smile you're using R", - cex = 2, - xlim = c(0,5), - ylim = c(0,5)) -lines(mouth, - type = "l", - col = "red") -``` - diff --git a/courses/2day/01_-_How_to_use_R.md b/courses/2day/01_-_How_to_use_R.md deleted file mode 100644 index 20115e4..0000000 --- a/courses/2day/01_-_How_to_use_R.md +++ /dev/null @@ -1,272 +0,0 @@ -How to use R/RStudio effectively -================ - -Learning Outcomes ------------------ - -- RStudio UI and main features -- R Objects and Functions -- Subsetting -- Plotting -- Help -- Installing Packages - -Welcome to RStudio ------------------- - -This course assumes that you have already got some basic knowledge of working with R. If you don't see the [prerequisites](https://github.com/ITSLeeds/TDS/blob/master/courses/2day.md). - - - -### Projects - -Projects are a way to organise related work together. Each project has its own folder and Rproj file. - -Start a new project with: - -> File > New Project - -You can choose to create a new directory (folder) or associate a project with an existing directory. Make a new project called TDS and save it in a sensible place on your computer. Notice that TDS now appears in the top right of RStudio. - -**Always do your work within a project** - -### R Scripts - -We could simply type all our code into the console, but that would require us to retype all our code every time we wish to run it. So we usually save code in a script file (with the .R extension). - -Make a new script: - -> Flie > New File > Rscript - -Or use the new script button on the toolbar. - -Save the script and give it a sensible name like `TDS-lesson-1.R` with: - -> File > Save - -Or the save button on the toolbar. - -### Writing Code - -Let's start with some basic R operations - -``` r -x <- 1:5 -y <- c(0,1,3,9,18) -plot(x, y) -``` - -This code creates two objects, both vectors of length == 5, and then plots them. - -### Running Code - -We have several ways to run code within a script. - -1. Place the cursor on a line of code and press `CTRL + Enter` to run that line of code. -2. Highlight a block of code or part of a line of code and press `CTRL + Enter` to run the highlighted code. -3. Press `CTRL + Shift + Enter` to run all the code in a script. -4. Press the Run button on the toolbar to run all the code in a script. -5. Use the function `source()` to run all the code in a script e.g. `source("TDS-lesson-1.R")` - -### Vewing Objects - -Lets create some different types of object: - -``` r -cat <- data.frame(name = c("Tiddles", "Chester", "Shadow"), - type = c("Tabby", "Persian", "Siamese"), - age = c(1, 3, 5), - likes_milk = c(TRUE, FALSE,TRUE)) -even_numbers <- seq(from = 2, to = 4000, by = 2) -random_letters <- sample(letters, size = 100, replace = TRUE) -small_matrix <- matrix(1:24, nrow = 12) -``` - -We can view the objects in a range of ways: - -1. Type the name of the object into the console e.g. `cat`, what happens if we try to view all 2000 even\_numbers? -2. Use the `head()` function to view the first few values e.g. `head(even_numbers)` -3. Use the view table button next to matrix or data.frame objects in the environment tab. - -We can also get an overview of an object using a range of functions. - -1. `summary()` -2. `class()` -3. `class()` -4. `dim()` -5. `length()` - -**Exercise** try these functions, what results do they give? - -### Using Autocomplete - -RStudio can help you write code by autocompleting. RStudio will look for similar objects and functions after typing the first three letters of a name. - - - -When there is more than one option you can select from the list using the mouse or arrow keys. - -Within a function you can get a list of arguments by pressing Tab. - - - -Notice the help popup. - -### Getting help - -Every function in R has a help page. You can view the help using `?` for example `?sum`. Many packages also contain vignettes, these are long form help documents containing examples and guides. `vignette()` will show a list of all the vignettes available, or you can show a specific vignette for example `vignette(topic = "sf1", package = "sf")`. - -### Commenting Code - -It is good practice to use comments in your code to explain what your code does. You can comment code using `#` - -For example: - -``` r -# A whole line comment -x <- 1:5 # An inline comment -y <- x * 2 -``` - -You can comment a whole block of text by selecting it and using CTRL + Shift + C - -You can add a comment section using CTRL + Shift + R - -### Cleaning your environment and removing objects - -The Environment tab shows all the objects in your environment, this includes Data, Values, and Functions. By default, new objects appear in the Global Environment but you can see other environments with the drop-down menu. For example, each package has its own environment. - -Sometimes you wish to remove things from your environment, perhaps because you no longer need them or things are getting cluttered. - -You can remove an object with the `rm()` function e.g. `rm(x)` or `rm(x,y)` or you can clear your whole environment with the broom button on the Environment Tab. - -### Debugging Code - -This code example will run, but we can see some of RStudio's debugging features by changing it. See that when the bracket is removed the red X and the underlying highlight the broken code. You may need to save the code you see the debugging prompt. - - - -**Always address debugging prompts before running your code** - -### Saving your work - -We have already seen that you can save an R script. You can also save R objects in the RDS format. - -``` r -saveRDS(cat,"cat.Rds") -``` - -We can also read back in our data. - -``` r -cat2 <- readRDS("cat.Rds") -identical(cat, cat2) -``` - -R also supports many other formats. For example CSV files. - -``` r -write.csv(cat, "cat.csv") -cat3 <- read.csv("cat.csv") -identical(cat3, cat1) -``` - -Notice that `cat3` and `cat` are not identical, what has changed? Hint: use `?write.csv`. - -### Subsetting - -We can subset any R object to just get part of the object. Subsetting can be done by either providing the positional numbers of the subset or logical vector of the same length. For two dimension object such as matrices and data.frames you can subset by row or column. Subsetting is done using square brackets `[]` after the name of an object. - -``` r -even_numbers[1:5] # Just the first five even_numbers -x[c(TRUE, FALSE,TRUE,FALSE,TRUE)] # The 1st, 3rd, and 5th element in x -cat[c(1,2),] # First and second row of cat -cat[,c(1,3)] # First and third column of cat -cat[,c("name","age")] # First and third column of cat by name -``` - -It is also possible to create logial vector for subsetting by creating a query - -``` r -x[x == 5] # Only when x == 5 (notice the use of double equals) -even_numbers[even_numbers < 50] # Just the even_numbers less than 50 -even_numbers[even_numbers %% 9 == 0] # Just the even_numbers that are a multiple of 9 -cat[cat$name == "Tiddles",] # THe rows where the name is Tiddles (notice the use of $) -``` - -### Dealing with NAs - -R object can have a value of NA. This is how R represents missing data. - -``` r -z <- c(4,5,NA,7) -``` - -NA values are common in real-world data but can cause trouble, for example - -``` r -sum(z) # Result is NA -``` - -Some functions can be told to ignore NA values. - -``` r -sum(z, na.rm = TRUE) # Result is equal to 4 + 5 + 7 -``` - -You can find NAs using the `is.na()` function, and then remove them - -``` r -is.na(z) -z_nona <- z[!is.na(z)] #Note the use of the not operator ! -sum(z) -``` - -Be careful of NAs especially in statistical analysis, for example, the average of a value excluding NAs may not be representative of the whole. - -### Packages - -R has lots of functionality built in, but the real value in R is the community of package developers. Packages add new functions to R. Some packages are so useful they have become almost essential while others are only used for specific purposes. - -There are two stages to using a package. - -#### Instaling a package. - -Packages that you don't have on your computer can be installed using `install.packages()` Packages come from [The Comprehensive R Archive Network](https://cran.r-project.org/) there are over 10,000 packages on CRAN. You only need to install a package once. - -**Note: it is bad practice to install packages within a script, as installing packages each time it runs risks corrupting the package** - -``` r -install.packages("sf") -``` - -Once you have a package on your computer you need to add it to your current environment - -``` r -library(sf) -``` - -It is good practice to shart your script by loading any packages that are required. - -There will be lots more about packages in the next session. - -Now you are ready to use R --------------------------- - -``` r -eyes <- c(2.3,4,3.7,4) -eyes <- matrix(eyes, ncol = 2, byrow = T) -mouth <- c(2,2,2.5,1.3,3,1,3.5,1.3,4,2) -mouth <- matrix(mouth, ncol = 2, byrow = T) -plot(eyes, - type = "p", - main = "Smile you're using R", - cex = 2, - xlim = c(0,5), - ylim = c(0,5)) -lines(mouth, - type = "l", - col = "red") -``` - -![](01_-_How_to_use_R_files/figure-markdown_github/smile-1.png) diff --git a/courses/2day/01_-_How_to_use_R_files/figure-markdown_github/smile-1.png b/courses/2day/01_-_How_to_use_R_files/figure-markdown_github/smile-1.png deleted file mode 100644 index 8784caf..0000000 Binary files a/courses/2day/01_-_How_to_use_R_files/figure-markdown_github/smile-1.png and /dev/null differ diff --git a/courses/2day/03 - Spatial data analysis.Rmd b/courses/2day/03 - Spatial data analysis.Rmd deleted file mode 100644 index d18188c..0000000 --- a/courses/2day/03 - Spatial data analysis.Rmd +++ /dev/null @@ -1,157 +0,0 @@ ---- -title: "Spatial data analysis" -output: github_document ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` - -## Reading and Writing Spatial Data - -Reading and Writing spatial data is done with the `st_read()` and `st_write()` functions. For example: - -```{r setup, eval=FALSE} -library(sf) -vector_filepath <- system.file("shapes/world.gpkg", package = "spData") -world <- st_read(vector_filepath) -st_write(world, "world.geojson") -``` - -The sf supports most spatial data formats, you can see a list using `st_drivers()`. - -For more information on reading and writing see [Chapter 7 of Geocompuation with R](https://geocompr.robinlovelace.net/read-write.html) - -**Exercises:** - -1. Try reading in one of your own datasets and converting it to a different format - - - - -## Projections and Coordinate Reference Systems - -When plotting a map you need X and Y coordinates to specify where objects should appear. While this is simple on a flat surface spatial data must fit onto the curved surface of the earth. You may know that it is impossible to unwrap a sphere into a single flat surface without distorting (stretching, twisting, cutting) the surface in some way. The process of making a flat map from a curved Earth is known as projection, and there are many valid ways to project a map. - -Cartographers can argue intensely about their preferred projections as this famous [XKCD comic](https://xkcd.com/977/) alludes to. Coordinate Reference Systems (CRS) refer to different ways of defining the X and Y coordinates used in different projections. Largely they fall into two categories: - -* Geographical Coordinate Systems: use latitude and longitude to represent any place on the Earth - -* Projected Coordinate Systems: use distances from an origin point to represent a small part of the Earth, e.g. a country. The advantage of a projects CRS is that it is easier to calculate properties such as distance and area as coordinates are in metres. - -You can find a catalogue of different CRSs at http://spatialreference.org/ - -CRSs are often referred to by the EPSG number. The European Petroleum Survey Group publish a database of different coordinate systems. Two useful projections to commit to memory are: - -* 4326 - the World Geodetic System 1984 which is a widely used geographical coordinate system, used in GPS datasets and the .geojson file format, for example. -* 27700 - the British National Grid - -Every `sf data.frame` has a CRS. - -```{r, eval=FALSE} -st_crs(nz) # 2193 the CRS for New Zealand Transverse Mercator -nz_latlng <- st_transform(nz, 4326) # Transfrom from one CRS to another -st_crs(nz) # 4326 the CRS for World Geodetic System 1984 -nz_latlng <- st_transform(nz, 2193) # Transfrom back -``` -**Warning** It is possible to change the CRS without reprojecting the data by: -```{r, eval=FALSE} -st_crs(nz) <- 4326 -``` - -This is risky as you may confuse you data by having the wrong CRS. - -For more infroamtion see [Chapter 6](https://geocompr.robinlovelace.net/reproj-geo-data.html) of Geocompuation with R. - -## Geometry Types - -Spatial data can have different types of geometry, the most common are POINT LINESTRING and POLYGON. You may also encounder their MULTI variants e.g. MULTIPOLYGON. The difference is the number of geometires allowed per row of the data.frame. For example the UK is made up of many islands, so can't be reprsented with a single polygon, but could be repesendted with many polygons as part of a MULTIPOLYGON. - -**Exercises:** - -1. Check the geometry type of nz using `st_geometry_type()` -1. Change the geometry from MULTIPOLYGON to POLYGON using `st_cast()`, what has changed? Does this invalidate any of the attributes? - -## Spatial Interactions - -It is possible to subset a `sf data.frame` by location as well as attributes. - -Lets load the locations of some mountains in New Zeland and then find which ones are within the `nz_large` areas. - -```{r, eval=TRUE} -nz_height <- spData::nz_height # Load the nz_height data -nz_height2 <- nz_height[canterbury,] -plot(nz$geom) -plot(nz_height, col = "black", add = TRUE) -plot(nz_height2, col = "red", add = TRUE) -``` - -By default the `st_intersects` fucntion use usesd to decide which rows to keep. - -```{r, eval=FALSE} -st_intersects(nz_height, nz_large) -``` -There are many different types of spatial interaction. You can see a list of them in the help. - -```{r, eval=FALSE} -?st_intersects -``` - -You could use a differnt fucntion by adding the `op` argument - -```{r, eval=TRUE} -nz_height3 <- nz_height[canterbury, , op = st_disjoint] -``` - -## Aggregation - -With a normal data.frame it is possible to group and aggregate variaibles using the `dplyr` packages. - -> group_by() %>% summarise() - -It is also possible to do this for sf `data.frames` iby default a `st_union` is performed on the geometries. - -```{r, eval=TRUE} -nz_islands <- nz %>% - group_by(Island) %>% - summarise(Population = sum(Population)) -plot(nz_islands) -``` - -Note that the implicit `st_union` has resolved all the internal boundaries of each island. If you wished to keep the boundaries you can use `st_combine`. - -```{r, eval=TRUE} -nz_islands <- nz %>% - group_by(Island) %>% - summarise(Population = sum(Population), do_union = FALSE) -plot(nz_islands) -``` - -**Exercises:** - -1. How many of these high points does the Canterbury region contain? -1. Which region has the second highest number of nz_height points in, and how many does it have? -1. Generalizing the question to all regions: how many of New Zealand’s 16 regions contain points which belong to the top 100 highest points in the country? Which regions? -Bonus: create a table listing these regions in order of the number of points and their name - - -## Geometric Operations - -Geometric operation change or derive from the geometry of our data. The most commonly used functions are: - -- `st_simplify` To simplify a complex shape -- `st_centroid` To find the geographical center of a shape -- `st_buffer` To creat a buffer around a shape - -For more see [Section 5.2 of Geocompuation with R](https://geocompr.robinlovelace.net/geometric-operations.html#geo-vec) - -**Exercises:** - -1. Create 10, 20, 30 km buffers around the moountains in New Zealand -1. Find the centroids of each region of New Zealand -1. Simplify the boundaries of New Zealand - -**Bonus Exercises:** - -1. See how the buffering and simplification changes when you change the CRS of the datasets. Hint: Try 4326 and 2193 - diff --git a/courses/2day/07 - Local Routing.Rmd b/courses/2day/07 - Local Routing.Rmd deleted file mode 100644 index 0e0072f..0000000 --- a/courses/2day/07 - Local Routing.Rmd +++ /dev/null @@ -1,215 +0,0 @@ ---- -title: "Local route network analysis" -output: html_document ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` - -## Routing from A to B - -Routing is the process of finding the "shortest" path from A to B. In this context shortest does not just mean in distance, it may be in time (quickest), or some other characteristic e.g. safest, quietest. - -There are many packages that enable you to do routing in R. When choosing a package you should consider several characteristics: - -### Local or Remote - -Some packages can do local routing on your own computer. While others allow you to connect to a service. - -**Local Routing** - -* Usually requires more effort to set up -* No cost (except for time and hardware) -* Control over data, custom scenarios possible futures etc - -**Remote Routing** - -* Easy setup -* May charge or limit the number of routes -* May support more complex options e.g. traffic, public transport -* Usually limited to routing in the present e.g. current road network current transport timetables. - -### Routing Features - -Not all routing services can do all types of routing, or do them equally well. Most do driving directions but consider if they do: - -* Walking / Cycling (if so does it include specialist road types, exclude dangerous roads) -* Take account of hilliness -* Public Transport (if so does it include fares, which types?) -* Are public transport routes based on timetables or real-time service status? -* Take account of steps and disabled access -* Support specialist vehicles (e.g. lorries and low bridges) -* Does it support real-time or historical traffic data? - -## Routing packages for R - -A non-comprehensive list of routing packages for R - -## Packages on CRAN - -* [googleway](https://cran.r-project.org/web/packages/googleway/) support for Google Maps and Directions -* [mapsapi](https://cran.rstudio.com/web/packages/mapsapi/) alternative for google maps -* [osrmr](https://cran.r-project.org/web/packages/osrmr/) Open Source Routing Machine, can connect to remote -* [CycleStreets](https://cran.r-project.org/web/packages/cyclestreets) Specialist cycling routing, used by -* [dodgr](https://cran.r-project.org/web/packages/dodgr) Routing done in R -* [igraph](https://cran.r-project.org/web/packages/igraph) General network analysis, not transport specific -* [stplanr](https://cran.r-project.org/web/packages/stplanr) Limited routing functions based on dodgr and igraph, and some other services. -* [gtfsrouter](https://cran.r-project.org/web/packages/gtfsrouter/index.html) For integrating GTFS public transport timetables - -## Packages on GitHub - -* [Open Route Service](https://giscience.github.io/openrouteservice-r/) Connect to ORS website -* [TransportAPI](https://github.com/mem48/transportAPI) An ITS Leeds Package, in development -* [OpenTripPlanner](https://github.com/ITSLeeds/opentripplanner) An ITS Leeds Package, local or remote OTP routing -* [graphhopper](https://github.com/graphhopper/directions-api-clients/tree/master/r) - - -## Getting some Routes - -Many services require you to sign up for a free API key, to save some time we will use the TransportAPI package that allows a small amount of routing to be done without an API key. - -**NOTE** If you want to do more routing please signup for a free API key at https://developer.transportapi.com/signup - -We will install the packages - -```{r, eval=FALSE} -# Install packages from GitHub -devtools::install_github("ITSleeds/transportAPI") -devtools::install_github("ITSleeds/opentripplanner") # For the bonus exercises -``` - -And load the packages - -```{r, eval=TRUE, message=FALSE, warning=FALSE} -# Load packages -library(sf) -library(stplanr) -library(tmap) -library(transportAPI) -library(opentripplanner) -library(pct) -library(dplyr) -``` - -TransportAPI you to signup for a [free API key](https://developer.transportapi.com/signup). - -```{r, eval=FALSE} -usethis::edit_r_environ() -# TRANSPORTAPI_app_id=your_id_here -# TRANSPORTAPI_app_key=your_key_here -``` - -Now we will get some data from the [PCT](http://www.pct.bike). The `get_pct_lines` function returns the desire lines from the PCT. - -```{r, eval=TRUE} -lines <- pct::get_pct_lines("isle-of-wight", "commute","lsoa") -lines <- lines[,c("id","geo_code1","geo_code2","all", - "bicycle","car_driver","train_tube","bus")] -centroids <- pct::get_pct_centroids("isle-of-wight", "commute","lsoa") -centroids <- centroids[,"geo_code"] -plot(lines$geometry, lwd = lines$all / mean(lines$all)) -plot(centroids, col = "red", add = T) -``` - -Now we will take the top 3 desire lines and route them though Transport API. First, we will subset the top lines and create from and to coordinates - -```{r, eval=TRUE} -lines_top <- top_n(lines, 10, all) -from <- centroids[match(lines_top$geo_code1, centroids$geo_code),] -to <- centroids[match(lines_top$geo_code2, centroids$geo_code),] -``` - -Then we will use the `tapi_journey_batch` to find multiple routes at once (note: the numbers used are not real, add your own keys). - -```{r, eval=FALSE} -Sys.setenv(TRANSPORTAPI_app_id = "7e8661c5") -Sys.setenv(TRANSPORTAPI_app_key = "ce106381f6e5787f223e720b6055d4f8") -routes_car <- transportAPI::tapi_journey_batch(from$geometry, - to$geometry, - fromid = from$geo_code, - toid = to$geo_code, - apitype = "car") -tmap_mode("view") -tm_shape(routes_car) + - tm_lines() -``` - -```{r, echo=FALSE, eval=FALSE} -sf::write_sf(routes_car, "routes_car.geojson") -piggyback::pb_upload("routes_car.geojson") -piggyback::pb_download_url() -``` - -If you cannot get this to work with your own API key, you can download the pre-generated file as follows: - -```{r} -u = "https://github.com/ITSLeeds/TDS/releases/download/0.2/routes_car.geojson" -routes_car = sf::read_sf(u) -``` - -We need to join the number of commuters onto the geometry of the routes. - -```{r, eval=TRUE} -st_geometry(lines_top) <- NULL -routes_car <- left_join(routes_car, lines_top, by = c("fromid" = "geo_code1", "toid" = "geo_code2")) -``` - -Finally, we can combine the routes into a route network - -```{r, eval=TRUE, message=FALSE} -rnet <- overline2(routes_car, attrib = c("all","bicycle","car_driver","train_tube","bus")) -tm_shape(rnet) + - tm_lines(col = "car_driver", lwd = 3) -``` - -**Exercises:** - -1. Get routes for the top 5 desire lines for different modes e.g. car, public transport, bike -1. How are public transport routes different to car and bike routes? -1. Plot these routes together on a map, where are there complementary and conflicting routes? - - - - - - -## Local Routing With Open Trip Planner - -We will repeat the analysis using a local routing tool. This tutorial is based on the [packgee vignette](https://itsleeds.github.io/opentripplanner/articles/opentripplanner.html) - -First, we need some basic data. - -```{r, eval=FALSE} -# Create Folders for Data -dir.create("OTP") -path_data <- file.path("OTP") -path_otp <- file.path(path_data, "otp.jar") -dir.create(file.path(path_data,"graphs")) # create a folder structure for the data -dir.create(file.path(path_data,"graphs","default")) - -# Download OTP and Data -download.file(url = "https://repo1.maven.org/maven2/org/opentripplanner/otp/1.3.0/otp-1.3.0-shaded.jar", - destfile = path_otp, mode = "wb") -download.file("https://github.com/ITSLeeds/opentripplanner/releases/download/0.1/isle-of-wight-demo.zip", - destfile = file.path(path_data,"isle-of-wight-demo.zip") , mode="wb") -unzip(file.path(path_data,"isle-of-wight-demo.zip"), exdir = file.path(path_data,"graphs","default")) -unlink(file.path(path_data,"isle-of-wight-demo.zip")) -``` - -Now we set up the OTP - -```{r, eval=FALSE} -log <- otp_build_graph(otp = path_otp, dir = path_data) -otp_setup(otp = path_otp, dir = path_data) -otpcon <- otp_connect() -``` - -Next, we find the routes - -```{r, eval=FALSE} -routes_driving <- otp_plan(otpcon, fromPlace = from, toPlace = to, - mode = "CAR") -``` - - diff --git a/courses/2day/07_-_Local_Routing.md b/courses/2day/07_-_Local_Routing.md deleted file mode 100644 index be5ee49..0000000 --- a/courses/2day/07_-_Local_Routing.md +++ /dev/null @@ -1,88 +0,0 @@ -Local route network analysis -================ - -Routing from A to B -------------------- - -Routing is the process of finding the "shortest" path from A to B. In this context shortest does not just mean in distance, it may be in time (quickest), or some other characteristic e.g. safest, quietest. - -There are many packages that enable you to do routing in R. When choosing a package you should consider seveal characteritics: - -### Local or Remote - -Some packages can do local routing on your own computer. While other allow you to connect to a service. - -**Local Routing** - -- Usually requires more effort to set up -- No cost (except for time and hardware) -- Control over data, custom scenarios possible futures etc - -\*\* Remote Routing \*\* - -- Easy setup -- May charge or limit the number of routes -- May support more complex options e.g. traffic, public transport -- Usually limited to routing in the present e.g. current road network current transpor timetables. - -### Routing Features - -Not all routing services can do all types of routing, or do them equally well. Most do driving directions but consider if they do: - -- Walking / Cycling (if so does it include specalist road types, exclude dangerour roads) -- Take account of hilliness -- Public Transport (if so does it include fares, which types?) -- Are public transport routes based on timetables or realtime service status? -- Take account of steps and disabled access -- Support specialist vehicles (e.g. lorrys and low bridges) -- Does it support realtime or historic traffic data? - -Routing packages for R ----------------------- - -A non-comprehesive list of routing packages for R - -Packages on CRAN ----------------- - -- [googleway](https://cran.r-project.org/web/packages/googleway/) support for Google Maps and Directions -- [mapsapi](https://cran.rstudio.com/web/packages/mapsapi/) alternative for google maps -- [osrmr](https://cran.r-project.org/web/packages/osrmr/) Open Source Routing Machine, can connect to remote \* [CycleStreets](https://cran.r-project.org/web/packages/cyclestreets) Specalist cycling routing, used by -- [dodgr](https://cran.r-project.org/web/packages/dodgr) Routing done in R -- [igraph](https://cran.r-project.org/web/packages/igraph) General network analysis, not transport specific -- [stplanr](https://cran.r-project.org/web/packages/stplanr) Limited routing functions based on dodgr and igraph, and some other services. -- [gtfsrouter](https://cran.r-project.org/web/packages/gtfsrouter/index.html) For integrating GTFS public transport timetables - -Packages on GitHub ------------------- - -- [Open Route Service](https://giscience.github.io/openrouteservice-r/) Connect to ORS website -- [TransportAPI](https://github.com/mem48/transportAPI) An ITS Leeds Package, in development -- [OpenTripPlanner](https://github.com/ITSLeeds/opentripplanner) An ITS Leeds Package, local or remote OTP routing -- [graphhopper](https://github.com/graphhopper/directions-api-clients/tree/master/r) - -Including Code --------------- - -You can include R code in the document as follows: - -``` r -summary(cars) -``` - - ## speed dist - ## Min. : 4.0 Min. : 2.00 - ## 1st Qu.:12.0 1st Qu.: 26.00 - ## Median :15.0 Median : 36.00 - ## Mean :15.4 Mean : 42.98 - ## 3rd Qu.:19.0 3rd Qu.: 56.00 - ## Max. :25.0 Max. :120.00 - -Including Plots ---------------- - -You can also embed plots, for example: - -![](07_-_Local_Routing_files/figure-markdown_github/pressure-1.png) - -Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. diff --git a/courses/2day/images/autocomplete.jpg b/courses/2day/images/autocomplete.jpg deleted file mode 100644 index c82fbbb..0000000 Binary files a/courses/2day/images/autocomplete.jpg and /dev/null differ diff --git a/courses/analysis-4hr.Rmd b/courses/analysis-4hr.Rmd deleted file mode 100644 index 0446d32..0000000 --- a/courses/analysis-4hr.Rmd +++ /dev/null @@ -1,317 +0,0 @@ ---- -title: "4 hour workshop: R for Transport Data Analysis" -output: github_document -bibliography: - - ../tds.bib - - ../software.bib ---- - -Transport Data Science course, based on materials hosted at https://github.com/ITSLeeds/TDS - -Home of this course: git.io/tds4hr - -Assumed prior knowledge: - -- Working knowledge of R, e.g. have completed: - - Introduction to R free DataCamp course: https://www.datacamp.com/courses/free-introduction-to-r - - Recommended reading: Section 4.2 of *Efficient R Programming* [@gillespie_efficient_2016]: https://csgillespie.github.io/efficientR/efficient-workflow.html#package-selection - -- Some knowledge of tidyverse is highly recommended, e.g. already know the contents of or have read-up on **and tried examples from** the following 'cheatsheets': - - Tidyverse for Beginners DataCamp Cheat Sheet (see [here](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Tidyverse+Cheat+Sheet.pdf)). **At a minimum you will have completed up to Chapter 5 on this online course** (this may take ~2 hours) - - Data wrangling with dplyr and tidyr RStudio cheet cheet (see [here](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf)) - - - -## Computing Requirements - -- Desktops with recent versions of R and RStudio installed, including: - - R 3.6.0 or later recommended (although any R 3.5 release should work): **download and install the latest version from** https://cran.r-project.org/ - - A recent version of RStudio: **download and install the latest version from** https://www.rstudio.com/products/rstudio/download/ - - Up-to-date versions of **tidyverse** and **sf** packages, at a minimum - - Other packages we'll use are shown in the code below, which installs them - -The packages used can be installed as follows: - -```{r, eval=FALSE} -install.packages("remotes") -pkgs = c( - "osmdata", - "pct", - "sf", - "stats19", - "stplanr", - "tidyverse", - "tmap" -) -remotes::install_cran(pkgs) -``` - -- Own laptops should be used, unless the necessary packages work on the cluster - -- Data: all data will be either provided by packages or downloaded on the day (assuming good internet) - - A test to check if data downloads work is accessing stats19 data (check this works): - -```{r, eval=FALSE} -crashes = stats19::get_stats19(year = 2017, type = "accidents", ask = FALSE) -``` - -- To test you can download data from OSM, try the following code: - -```{r} -# set-up -library(osmdata) -library(tmap) -ttm() - -# get data -d = opq("madrid") %>% - add_osm_feature("highway", "cycleway") %>% - osmdata_sf() - -# plot data -tm_shape(d$osm_lines) + - tm_lines(col = "blue", lwd = 5) + - tm_view(basemaps = leaflet::providers$OpenStreetMap.BlackAndWhite) -``` - - -## Venue and course times - -The course will take place from 10:00 to 14:00 at [Edificio Multiusos Ciudad Universitaria](https://www.google.com/maps/place/Edificio+Multiusos/@40.4500279,-3.7331934,15z/data=!4m5!3m4!1s0x0:0x721fc321c370faad!8m2!3d40.4500279!4d-3.7331934), 28040 Madrid - -See https://forms.gle/m5zraC8QQH5AydgT8 - - - -## Required reading - -The course will build on Chapter 12 of *Geocomputation with R* [@lovelace_geocomputation_2019]: http://geocompr.robinlovelace.net/transport.html - -## Optional extra reading - -- Optional extra reading: for context, you may want to read-up on: - - Overview of GIS in R, e.g. in Chapter 1 of *Geocomputation with R* or this blog post: https://www.jessesadler.com/post/gis-with-r-intro/ - - stplanr: A package for transport planning [@lovelace_stplanr:_2018] - - R for data science [@grolemund_r_2016] - -### Agenda - -- 10:00 - 11:00: Introduction and getting started with R - -- 11:00 - 12:00: Getting and analysing transport data: examples with `stats19`, `pct` and `osmdata` packages - -12:00 - 12:30: Break - -- 12:30 - 13:00: Origin-destination (OD) data analysis with `stplanr` - -- 13:00 - 14:00: From routes to route networks and data and methods for assessing cycling potential - -## Exercises - -## Getting started - -See section 1 of the exercises: https://git.io/tds2dayex - - -```{r, out.width="30%", fig.show='hold', message=FALSE} -d = data.frame(x = 1:9, y = (1:9)^2) -plot(d) # base R -library(ggplot2) # with a package -ggplot(d) + - geom_point(aes(x, y)) -``` - -## Getting tranport data - -Starting point: run the following commands. - -```{r} -library(tidyverse) -library(stats19) -library(pct) -regions = pct::pct_regions -iow = regions %>% - filter(region_name == "isle-of-wight") -plot(iow) -vehicles = get_stats19(year = 2017, type = "vehicles", ask = FALSE) -``` - - - -1. Download and plot all crashes reported in Great Britain in 2017 (hint: see [the stats19 vignette](https://cran.r-project.org/web/packages/stats19/vignettes/stats19.html)) -1. Find the function in the `stats19` package that converts a `data.frame` object into an `sf` data frame. Use this function to convert the road crashes into an `sf` object, called `crashes_sf`, for example. -1. Filter crashes that happened in the Isle of Wight based on attribute data (hint: the relevant column contains the word `local`) -1. Filter crashes happened in the Isle of Wight using geographic subsetting (hint: remember `st_crs()`?) - -## Origin-destination data with stplanr - -The starting point of these exercises is to work with the Origin-Destination data in the `stplanr` package. - -```{r} -library(stplanr) -head(flow) -plot(flow$On.foot, flow$Bicycle) -cor(flow$On.foot, flow$Bicycle) -plot(zones_sf) -l = od2line(flow, zones_sf) -plot(l) -``` - -- Plot the map interactively using mapview -- Highlight the desire line along which most people walk - -You can analyse OD data in more detail as follows. - -```{r} -l1 = l %>% - top_n(1, Bicycle) -plot(l1) -``` - - - - - -### Exercises - -1. Create an object representing desire lines in Isle of Wight, e.g, with: `desire_lines_all = pct::get_pct_lines(region = "isle-of-wight")` -1. Subset the desire lines with a value of `all` of 200 or above -1. Create a buffer of 500 m for each desire line and calculate the number of crashes that happened within each (using STATS19 data downloaded in the previous exercise) -1. Create a faceted plot showing the temporal distribution of crashes in Isle of Wight (you can choose whether to show these over the months of the year, over days of the week, or over the hours of a day) -1. Do a spatiotemporal subset to identify the crashes that happened within the most commonly travelled desire line between 07:00 and 10:00 during weekdays. - - -```{r, eval=FALSE} - -u = "https://www.openstreetmap.org/trace/2992569/data" -download.file(u, "track.gpx") -sf::st_layers("track.gpx") - -tracks = sf::st_read("track.gpx", layer= "track_points") -# ?st_read -mapview::mapview(tracks) - - -library(stplanr) -library(tidyverse) -f = flow[1:3, 1:5] -f_tidy = flow %>% - select(1:5) %>% - slice(1:3) -f -class(f) -z = zones_sf -l = od2line(f, z) -plot(l[2, ]) -nrow(l) -r = line2route(l, route_fun = route_osrm) -plot(r) - -# get data from PCT -od = pct::get_od() -od -class(od) -iow = pct::get_pct_centroids("isle-of-wight") -plot(iow) -mapview::mapview(iow) - -od_iow = od %>% - filter(geo_code1 %in% iow$geo_code) %>% - filter(geo_code2 %in% iow$geo_code) - -l_iow = od2line(od_iow, iow) -mapview::mapview(l_iow) - -l_iow = l_iow %>% - mutate(percent_drive = car_driver / all * 100) %>% - mutate(distance_euclidean = sf::st_length(l_iow)) %>% - mutate(distance_euclidean = as.numeric(distance_euclidean)) -l_iow$percent_drive -class(l_iow$distance_euclidean) - -summary(l_iow$percent_drive) -summary(l_iow$distance_euclidean) - -l_iow_car_dependent = l_iow %>% - filter(percent_drive > 75) %>% - filter(distance_euclidean < 10000) - -mapview::mapview(l_iow_car_dependent) - -r_fast = line2route(l_iow_car_dependent) -r = line2route(l_iow_car_dependent, plan = "quietest") - -mapview::mapview(r) + - mapview::mapview(r_fast) - -r_fast$bicycle = l_iow_car_dependent$bicycle - -iow_rnet = overline(r_fast, "bicycle") -plot(iow_rnet) - -# git.io/tds2dayex - -# travel patterns in/around complutense -library(osmdata) -library(tidyverse) -complu = opq(bbox = "Madrid") %>% - add_osm_feature(key = "name", value = "Complutense", value_exact = FALSE) %>% - osmdata_sf() - -complu - -complu_poly = complu$osm_polygons -mapview::mapview(complu_poly) -complu1 = complu_poly %>% - mutate(area = as.numeric(sf::st_area(.))) %>% - top_n(1, area) -mapview::mapview(complu1) - -# get 1 km buffer -complu_buffer = complu1 %>% - st_transform(3087) %>% - st_buffer(1000) %>% - st_transform(4326) - -mapview::mapview(complu_buffer) -b = sf::st_bbox(complu_buffer) - -# s = dodgr::dodgr_streetnet(bbox = b) -osm = opq(bbox = b) %>% - add_osm_feature("highway") %>% - osmdata_sf() - -mapview::mapview(osm$osm_lines) - -rnet = SpatialLinesNetwork(osm$osm_lines) - -rnet@sl$centrality = igraph::edge_betweenness(rnet@g) - -plot(rnet@sl["centrality"]) - -library(tmap) -ttm() -tm_shape(rnet@sl) + - tm_lines(lwd = "centrality", scale = 9, col = "highway") - -# try cyclstreets in Madrid - -spain = rnaturalearth::ne_(scale = 10, country = "Spain", returnclass = "sf") -names(spain) - -sf::st_geometry_type(spain) - -sf::write_sf(spain, "/tmp/spain.gpkg") - -u = "http://www.ine.es/censos2011_datos/cartografia_censo2011_nacional.zip" -download.file(u, "data.zip") -unzip("data.zip") -spain = sf::read_sf("SECC_CPV_E_20111101_01_R_INE.shp") -spain_sub = spain %>% - filter(CUSECCUMUN == "01001") -mapview::mapview(spain[1:100, ]) -``` - - -## References - diff --git a/courses/analysis-4hr.md b/courses/analysis-4hr.md deleted file mode 100644 index fa573e8..0000000 --- a/courses/analysis-4hr.md +++ /dev/null @@ -1,128 +0,0 @@ -4 hour workshop: R for Transport Data Analysis -================ - -Transport Data Science course, based on materials hosted at - -Assumed prior knowledge: - -- Working knowledge of R, e.g. have completed: -- Introduction to R free DataCamp course: -- Recommended reading: Section 4.2 of *Efficient R Programming* (Gillespie and Lovelace 2016): - -- Some knowledge of tidyverse is highly recommended, e.g. already know the contents of or have read-up on **and tried examples from** the following 'cheatsheets': -- Tidyverse for Beginners DataCamp Cheat Sheet (see [here](https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Tidyverse+Cheat+Sheet.pdf)). **At a minimum you will have completed up to Chapter 5 on this online course** (this may take ~2 hours) -- Data wrangling with dplyr and tidyr RStudio cheet cheet (see [here](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf)) - - -Computing Requirements ----------------------- - -- Desktops with recent versions of R and RStudio installed, including: -- R 3.6.0 or later recommended (although any R 3.5 release should work): **download and install the latest version from** -- A recent version of RStudio: **download and install the latest version from** -- Up-to-date versions of **tidyverse** and **sf** packages, at a minimum -- Other packages we'll use are shown in the code below, which installs them - -The packages used can be installed as follows: - -``` r -install.packages("remotes") -pkgs = c( - "osmdata", - "pct", - "sf", - "stats19", - "stplanr", - "tidyverse", - "tmap" -) -remotes::install_cran(pkgs) -``` - -- Own laptops should be used, unless the necessary packages work on the cluster - -- Data: all data will be either provided by packages or downloaded on the day (assuming good internet) -- A test to check if data downloads work is accessing stats19 data (check this works): - -``` r -crashes = stats19::get_stats19(year = 2017, type = "accidents", ask = FALSE) -``` - -- To test you can download data from OSM, try the following code: - -``` r -# set-up -library(osmdata) -``` - - ## Data (c) OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright - -``` r -library(tmap) -ttm() -``` - - ## tmap mode set to interactive viewing - -``` r -# get data -d = opq("madrid") %>% - add_osm_feature("highway", "cycleway") %>% - osmdata_sf() - -# plot data -tm_shape(d$osm_lines) + - tm_lines(col = "blue", lwd = 5) + - tm_view(basemaps = leaflet::providers$OpenStreetMap.BlackAndWhite) -``` - - ## Linking to GEOS 3.5.1, GDAL 2.1.2, PROJ 4.9.3 - -![](analysis-4hr_files/figure-markdown_github/unnamed-chunk-3-1.png) - -Venue and course times ----------------------- - -The course will take place from 10:00 to 14:00 at [Edificio Multiusos Ciudad Universitaria](https://www.google.com/maps/place/Edificio+Multiusos/@40.4500279,-3.7331934,15z/data=!4m5!3m4!1s0x0:0x721fc321c370faad!8m2!3d40.4500279!4d-3.7331934), 28040 Madrid - -See - - -Required reading ----------------- - -The course will build on Chapter 12 of *Geocomputation with R* (Lovelace, Nowosad, and Meunchow 2019): - -Optional extra reading ----------------------- - -- Optional extra reading: for context, you may want to read-up on: -- Overview of GIS in R, e.g. in Chapter 1 of *Geocomputation with R* or this blog post: -- stplanr: A package for transport planning (Lovelace and Ellison 2018) -- R for data science (Grolemund and Wickham 2016) - -### Agenda - -- 10:00 - 11:00: Introduction and getting started with R - -- 11:00 - 12:00: Getting and analysing spatio-temporal transport: examples with `stats19`, `pct` and `osmdata` packages - -12:00 - 12:30: Break - -- 12:30 - 13:00: Origin-destination (OD) data analysis with `stplanr` - -- 13:00 - 14:00: From routes to route networks and data and methods for assessing cycling potential - -Exercises ---------- - -References ----------- - -Gillespie, Colin, and Robin Lovelace. 2016. *Efficient R Programming: A Practical Guide to Smarter Programming*. O’Reilly Media. . - -Grolemund, Garrett, and Hadley Wickham. 2016. *R for Data Science*. 1 edition. O’Reilly Media. - -Lovelace, Robin, and Richard Ellison. 2018. “Stplanr: A Package for Transport Planning.” *The R Journal* 10 (2): 7–23. doi:[10.32614/RJ-2018-053](https://doi.org/10.32614/RJ-2018-053). - -Lovelace, Robin, Jakub Nowosad, and Jannes Meunchow. 2019. *Geocomputation with R*. CRC Press. . diff --git a/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-4-1.png b/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-4-1.png deleted file mode 100644 index ad2e7ad..0000000 Binary files a/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-4-1.png and /dev/null differ diff --git a/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-4-2.png b/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-4-2.png deleted file mode 100644 index 2512bb5..0000000 Binary files a/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-4-2.png and /dev/null differ diff --git a/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-6-1.png b/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-6-1.png deleted file mode 100644 index 6bb8e28..0000000 Binary files a/courses/analysis-4hr_files/figure-gfm/unnamed-chunk-6-1.png and /dev/null differ diff --git a/courses/libs/Proj4Leaflet/proj4leaflet.js b/courses/libs/Proj4Leaflet/proj4leaflet.js deleted file mode 100644 index 5ce8d47..0000000 --- a/courses/libs/Proj4Leaflet/proj4leaflet.js +++ /dev/null @@ -1,272 +0,0 @@ -(function (factory) { - var L, proj4; - if (typeof define === 'function' && define.amd) { - // AMD - define(['leaflet', 'proj4'], factory); - } else if (typeof module === 'object' && typeof module.exports === "object") { - // Node/CommonJS - L = require('leaflet'); - proj4 = require('proj4'); - module.exports = factory(L, proj4); - } else { - // Browser globals - if (typeof window.L === 'undefined' || typeof window.proj4 === 'undefined') - throw 'Leaflet and proj4 must be loaded first'; - factory(window.L, window.proj4); - } -}(function (L, proj4) { - if (proj4.__esModule && proj4.default) { - // If proj4 was bundled as an ES6 module, unwrap it to get - // to the actual main proj4 object. - // See discussion in https://github.com/kartena/Proj4Leaflet/pull/147 - proj4 = proj4.default; - } - - L.Proj = {}; - - L.Proj._isProj4Obj = function(a) { - return (typeof a.inverse !== 'undefined' && - typeof a.forward !== 'undefined'); - }; - - L.Proj.Projection = L.Class.extend({ - initialize: function(code, def, bounds) { - var isP4 = L.Proj._isProj4Obj(code); - this._proj = isP4 ? code : this._projFromCodeDef(code, def); - this.bounds = isP4 ? def : bounds; - }, - - project: function (latlng) { - var point = this._proj.forward([latlng.lng, latlng.lat]); - return new L.Point(point[0], point[1]); - }, - - unproject: function (point, unbounded) { - var point2 = this._proj.inverse([point.x, point.y]); - return new L.LatLng(point2[1], point2[0], unbounded); - }, - - _projFromCodeDef: function(code, def) { - if (def) { - proj4.defs(code, def); - } else if (proj4.defs[code] === undefined) { - var urn = code.split(':'); - if (urn.length > 3) { - code = urn[urn.length - 3] + ':' + urn[urn.length - 1]; - } - if (proj4.defs[code] === undefined) { - throw 'No projection definition for code ' + code; - } - } - - return proj4(code); - } - }); - - L.Proj.CRS = L.Class.extend({ - includes: L.CRS, - - options: { - transformation: new L.Transformation(1, 0, -1, 0) - }, - - initialize: function(a, b, c) { - var code, - proj, - def, - options; - - if (L.Proj._isProj4Obj(a)) { - proj = a; - code = proj.srsCode; - options = b || {}; - - this.projection = new L.Proj.Projection(proj, options.bounds); - } else { - code = a; - def = b; - options = c || {}; - this.projection = new L.Proj.Projection(code, def, options.bounds); - } - - L.Util.setOptions(this, options); - this.code = code; - this.transformation = this.options.transformation; - - if (this.options.origin) { - this.transformation = - new L.Transformation(1, -this.options.origin[0], - -1, this.options.origin[1]); - } - - if (this.options.scales) { - this._scales = this.options.scales; - } else if (this.options.resolutions) { - this._scales = []; - for (var i = this.options.resolutions.length - 1; i >= 0; i--) { - if (this.options.resolutions[i]) { - this._scales[i] = 1 / this.options.resolutions[i]; - } - } - } - - this.infinite = !this.options.bounds; - - }, - - scale: function(zoom) { - var iZoom = Math.floor(zoom), - baseScale, - nextScale, - scaleDiff, - zDiff; - if (zoom === iZoom) { - return this._scales[zoom]; - } else { - // Non-integer zoom, interpolate - baseScale = this._scales[iZoom]; - nextScale = this._scales[iZoom + 1]; - scaleDiff = nextScale - baseScale; - zDiff = (zoom - iZoom); - return baseScale + scaleDiff * zDiff; - } - }, - - zoom: function(scale) { - // Find closest number in this._scales, down - var downScale = this._closestElement(this._scales, scale), - downZoom = this._scales.indexOf(downScale), - nextScale, - nextZoom, - scaleDiff; - // Check if scale is downScale => return array index - if (scale === downScale) { - return downZoom; - } - if (downScale === undefined) { - return -Infinity; - } - // Interpolate - nextZoom = downZoom + 1; - nextScale = this._scales[nextZoom]; - if (nextScale === undefined) { - return Infinity; - } - scaleDiff = nextScale - downScale; - return (scale - downScale) / scaleDiff + downZoom; - }, - - distance: L.CRS.Earth.distance, - - R: L.CRS.Earth.R, - - /* Get the closest lowest element in an array */ - _closestElement: function(array, element) { - var low; - for (var i = array.length; i--;) { - if (array[i] <= element && (low === undefined || low < array[i])) { - low = array[i]; - } - } - return low; - } - }); - - L.Proj.GeoJSON = L.GeoJSON.extend({ - initialize: function(geojson, options) { - this._callLevel = 0; - L.GeoJSON.prototype.initialize.call(this, geojson, options); - }, - - addData: function(geojson) { - var crs; - - if (geojson) { - if (geojson.crs && geojson.crs.type === 'name') { - crs = new L.Proj.CRS(geojson.crs.properties.name); - } else if (geojson.crs && geojson.crs.type) { - crs = new L.Proj.CRS(geojson.crs.type + ':' + geojson.crs.properties.code); - } - - if (crs !== undefined) { - this.options.coordsToLatLng = function(coords) { - var point = L.point(coords[0], coords[1]); - return crs.projection.unproject(point); - }; - } - } - - // Base class' addData might call us recursively, but - // CRS shouldn't be cleared in that case, since CRS applies - // to the whole GeoJSON, inluding sub-features. - this._callLevel++; - try { - L.GeoJSON.prototype.addData.call(this, geojson); - } finally { - this._callLevel--; - if (this._callLevel === 0) { - delete this.options.coordsToLatLng; - } - } - } - }); - - L.Proj.geoJson = function(geojson, options) { - return new L.Proj.GeoJSON(geojson, options); - }; - - L.Proj.ImageOverlay = L.ImageOverlay.extend({ - initialize: function (url, bounds, options) { - L.ImageOverlay.prototype.initialize.call(this, url, null, options); - this._projectedBounds = bounds; - }, - - // Danger ahead: Overriding internal methods in Leaflet. - // Decided to do this rather than making a copy of L.ImageOverlay - // and doing very tiny modifications to it. - // Future will tell if this was wise or not. - _animateZoom: function (event) { - var scale = this._map.getZoomScale(event.zoom); - var northWest = L.point(this._projectedBounds.min.x, this._projectedBounds.max.y); - var offset = this._projectedToNewLayerPoint(northWest, event.zoom, event.center); - - L.DomUtil.setTransform(this._image, offset, scale); - }, - - _reset: function () { - var zoom = this._map.getZoom(); - var pixelOrigin = this._map.getPixelOrigin(); - var bounds = L.bounds( - this._transform(this._projectedBounds.min, zoom)._subtract(pixelOrigin), - this._transform(this._projectedBounds.max, zoom)._subtract(pixelOrigin) - ); - var size = bounds.getSize(); - - L.DomUtil.setPosition(this._image, bounds.min); - this._image.style.width = size.x + 'px'; - this._image.style.height = size.y + 'px'; - }, - - _projectedToNewLayerPoint: function (point, zoom, center) { - var viewHalf = this._map.getSize()._divideBy(2); - var newTopLeft = this._map.project(center, zoom)._subtract(viewHalf)._round(); - var topLeft = newTopLeft.add(this._map._getMapPanePos()); - - return this._transform(point, zoom)._subtract(topLeft); - }, - - _transform: function (point, zoom) { - var crs = this._map.options.crs; - var transformation = crs.transformation; - var scale = crs.scale(zoom); - - return transformation.transform(point, scale); - } - }); - - L.Proj.imageOverlay = function (url, bounds, options) { - return new L.Proj.ImageOverlay(url, bounds, options); - }; - - return L.Proj; -})); diff --git a/courses/libs/leaflet-providers-plugin/leaflet-providers-plugin.js b/courses/libs/leaflet-providers-plugin/leaflet-providers-plugin.js deleted file mode 100644 index 82cd630..0000000 --- a/courses/libs/leaflet-providers-plugin/leaflet-providers-plugin.js +++ /dev/null @@ -1,3 +0,0 @@ -LeafletWidget.methods.addProviderTiles = function(provider, layerId, group, options) { - this.layerManager.addLayer(L.tileLayer.provider(provider, options), "tile", layerId, group); -}; diff --git a/courses/libs/leaflet-providers/package.json b/courses/libs/leaflet-providers/package.json deleted file mode 100644 index 9a71ca5..0000000 --- a/courses/libs/leaflet-providers/package.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "leaflet-providers", - "version": "1.1.17", - "description": "An extension to Leaflet that contains configurations for various free tile providers.", - "main": "leaflet-providers.js", - "repository": { - "type": "git", - "url": "git://github.com/leaflet-extras/leaflet-providers.git" - }, - "scripts": { - "test": "npm run lint && npm run testsuite", - "testsuite": "phantomjs ./node_modules/mocha-phantomjs-core/mocha-phantomjs-core.js tests/index.html", - "lint": "eslint --config .eslintrc leaflet-providers.js index.html preview/*.js preview/*.html tests/*", - "min": "uglifyjs leaflet-providers.js -mc -o leaflet-providers.min.js", - "release": "mversion patch -m" - }, - "license": "BSD-2-Clause", - "bugs": { - "url": "https://github.com/leaflet-extras/leaflet-providers/issues" - }, - "files": [ - "leaflet-providers.js", - "README.md", - "CHANGELOG.md", - "licence.md" - ], - "devDependencies": { - "chai": "^4.1.2", - "eslint": "^3.16.1", - "eslint-plugin-html": "^2.0.1", - "mocha": "^3.2.0", - "mocha-phantomjs-core": "^2.1.1", - "mversion": "^2.0.0", - "phantomjs-prebuilt": "^2.1.16", - "uglify-js": "^2.4.15" - }, - "autoupdate": { - "source": "git", - "target": "git://github.com/leaflet-extras/leaflet-providers.git", - "basePath": "/", - "files": [ - "leaflet-providers.js" - ] - } -} diff --git a/courses/libs/leaflet-providers/rstudio_install.md b/courses/libs/leaflet-providers/rstudio_install.md deleted file mode 100644 index 1775fd9..0000000 --- a/courses/libs/leaflet-providers/rstudio_install.md +++ /dev/null @@ -1,19 +0,0 @@ -# Location -* from: github.com/schloerke/leaflet-providers@urlProtocol - -* Inspiration taken from https://github.com/leaflet-extras/leaflet-providers/commit/dea786a3219f9cc824b8e96903a17f46ca9a5afc to use the 'old' relative url protocols and to 'upgrade' them at js runtime. - - - -# Notes... - -* Copy/paste provider information into `providers.json` -```js -var providers = L.TileLayer.Provider.providers; -JSON.stringify(providers, null, " "); -``` - * `./data-raw/providerNames.R` was re-ran to update to the latest providers - -* Some providers had their protocols turned into '//'. - * This allows browsers to pick the protocol - * To stop files from the protocols staying as files, a ducktape patch was applied to `L.TileLayer.prototype.initialize` and `L.TileLayer.WMS.prototype.initialize` diff --git a/courses/libs/leaflet/images/layers-2x.png b/courses/libs/leaflet/images/layers-2x.png deleted file mode 100644 index 200c333..0000000 Binary files a/courses/libs/leaflet/images/layers-2x.png and /dev/null differ diff --git a/courses/libs/leaflet/images/layers.png b/courses/libs/leaflet/images/layers.png deleted file mode 100644 index 1a72e57..0000000 Binary files a/courses/libs/leaflet/images/layers.png and /dev/null differ diff --git a/courses/libs/leaflet/images/marker-icon-2x.png b/courses/libs/leaflet/images/marker-icon-2x.png deleted file mode 100644 index 88f9e50..0000000 Binary files a/courses/libs/leaflet/images/marker-icon-2x.png and /dev/null differ diff --git a/courses/libs/leaflet/images/marker-icon.png b/courses/libs/leaflet/images/marker-icon.png deleted file mode 100644 index 950edf2..0000000 Binary files a/courses/libs/leaflet/images/marker-icon.png and /dev/null differ diff --git a/courses/libs/leaflet/images/marker-shadow.png b/courses/libs/leaflet/images/marker-shadow.png deleted file mode 100644 index 9fd2979..0000000 Binary files a/courses/libs/leaflet/images/marker-shadow.png and /dev/null differ diff --git a/courses/libs/leafletfix/leafletfix.css b/courses/libs/leafletfix/leafletfix.css deleted file mode 100644 index 3ae60ff..0000000 --- a/courses/libs/leafletfix/leafletfix.css +++ /dev/null @@ -1,36 +0,0 @@ -/* Work around CSS properties introduced on img by bootstrap */ -img.leaflet-tile { - padding: 0; - margin: 0; - border-radius: 0; - border: none; -} -.info { - padding: 6px 8px; - font: 14px/16px Arial, Helvetica, sans-serif; - background: white; - background: rgba(255,255,255,0.8); - box-shadow: 0 0 15px rgba(0,0,0,0.2); - border-radius: 5px; -} -.legend { - line-height: 18px; - color: #555; -} -.legend svg text { - fill: #555; -} -.legend svg line { - stroke: #555; -} -.legend i { - width: 18px; - height: 18px; - margin-right: 4px; - opacity: 0.7; - display: inline-block; - vertical-align: top; - /*For IE 7*/ - zoom: 1; - *display: inline; -} diff --git a/courses/libs/remark-css/default-fonts.css b/courses/libs/remark-css/default-fonts.css deleted file mode 100644 index 8d035fa..0000000 --- a/courses/libs/remark-css/default-fonts.css +++ /dev/null @@ -1,10 +0,0 @@ -@import url(https://fonts.googleapis.com/css?family=Yanone+Kaffeesatz); -@import url(https://fonts.googleapis.com/css?family=Droid+Serif:400,700,400italic); -@import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700); - -body { font-family: 'Droid Serif', 'Palatino Linotype', 'Book Antiqua', Palatino, 'Microsoft YaHei', 'Songti SC', serif; } -h1, h2, h3 { - font-family: 'Yanone Kaffeesatz'; - font-weight: normal; -} -.remark-code, .remark-inline-code { font-family: 'Source Code Pro', 'Lucida Console', Monaco, monospace; } diff --git a/courses/libs/remark-css/default.css b/courses/libs/remark-css/default.css deleted file mode 100644 index cb9fc34..0000000 --- a/courses/libs/remark-css/default.css +++ /dev/null @@ -1,72 +0,0 @@ -a, a > code { - color: rgb(249, 38, 114); - text-decoration: none; -} -.footnote { - position: absolute; - bottom: 3em; - padding-right: 4em; - font-size: 90%; -} -.remark-code-line-highlighted { background-color: #ffff88; } - -.inverse { - background-color: #272822; - color: #d6d6d6; - text-shadow: 0 0 20px #333; -} -.inverse h1, .inverse h2, .inverse h3 { - color: #f3f3f3; -} -/* Two-column layout */ -.left-column { - color: #777; - width: 20%; - height: 92%; - float: left; -} -.left-column h2:last-of-type, .left-column h3:last-child { - color: #000; -} -.right-column { - width: 75%; - float: right; - padding-top: 1em; -} -.pull-left { - float: left; - width: 47%; -} -.pull-right { - float: right; - width: 47%; -} -.pull-right ~ * { - clear: both; -} -img, video, iframe { - max-width: 100%; -} -blockquote { - border-left: solid 5px lightgray; - padding-left: 1em; -} -.remark-slide table { - margin: auto; - border-top: 1px solid #666; - border-bottom: 1px solid #666; -} -.remark-slide table thead th { border-bottom: 1px solid #ddd; } -th, td { padding: 5px; } -.remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee } - -@page { margin: 0; } -@media print { - .remark-slide-scaler { - width: 100% !important; - height: 100% !important; - transform: scale(1) !important; - top: 0 !important; - left: 0 !important; - } -} diff --git a/courses/libs/rstudio_leaflet/images/1px.png b/courses/libs/rstudio_leaflet/images/1px.png deleted file mode 100644 index 9da19ea..0000000 Binary files a/courses/libs/rstudio_leaflet/images/1px.png and /dev/null differ diff --git a/courses/libs/rstudio_leaflet/rstudio_leaflet.css b/courses/libs/rstudio_leaflet/rstudio_leaflet.css deleted file mode 100644 index 6fc5b20..0000000 --- a/courses/libs/rstudio_leaflet/rstudio_leaflet.css +++ /dev/null @@ -1,32 +0,0 @@ -.leaflet-tooltip.leaflet-tooltip-text-only, -.leaflet-tooltip.leaflet-tooltip-text-only:before, -.leaflet-tooltip.leaflet-tooltip-text-only:after { - background: none; - border: none; - box-shadow: none; -} - -.leaflet-tooltip.leaflet-tooltip-text-only.leaflet-tooltip-left { - margin-left: 5px; -} - -.leaflet-tooltip.leaflet-tooltip-text-only.leaflet-tooltip-right { - margin-left: -5px; -} - -.leaflet-tooltip:after { - border-right: 6px solid transparent; - /* right: -16px; */ -} - -.leaflet-popup-pane .leaflet-popup-tip-container { - /* when the tooltip container is clicked, it is closed */ - pointer-events: all; - /* tooltips should display the "hand" icon, just like .leaflet-interactive*/ - cursor: pointer; -} - -/* have the widget be displayed in the right 'layer' */ -.leaflet-map-pane { - z-index: auto; -} diff --git a/courses/tds-oneday.Rmd b/courses/tds-oneday.Rmd deleted file mode 100644 index 340f20a..0000000 --- a/courses/tds-oneday.Rmd +++ /dev/null @@ -1,197 +0,0 @@ ---- -output: - # word_document: default - github_document: default - # html_document: default ---- - -# Transport Data Science with R - -## Dates and prices - -Next on: 5^th^ April. - - -Delegate type | Price (£ early bird) | Price (£) ---------------|:---------------------|:--------- -Private sector| 400 | 450 -Public sector | 300 | 350 -Student | 200 | 250 - -For booking, see here: https://store.leeds.ac.uk/conferences-and-events/environment/geography/transport-data-science-with-r-2019 - - - -## Overview - -This course teaches two skill-sets that are fundamental in modern transport research: programming and data analytics. Combining these enables powerful transport planning and analysis workflows for tackling a wide range of problems, including: - -- How to find, download and import a range of transport datasets? -- How to develop automated and reproducible transport planning workflows? -- How can increasingly available datasets on air quality, traffic and active travel be used to inform policy? -- How to visualise results in an attractive and potentially on-line and interactive manner? - -This course will provide tools, code, data and, above all, face-to-face teaching to answer these questions and more, with the statistical programming language R. -The data science approach opens a world of possibilities for generating insight from your transport datasets. -The for researchers in the public sector, academia and industry. - - -## Learning objectives - -By the end of the course, you will be able to: - -- Find, download and import a variety of transport datasets, including from OpenStreetMap and government data portals -- Work with, analyse and model transport data with spatial, temporal and demographic attributes -- Work with air polution data in R and compare with transport behaviours -- Generate and analyse route networks for transport planning with reference to: - - Origin-destination (OD) data - - Geographic desire lines - - Route allocation using different routing services - - Route network generation and analysis - -## Pre-requisites and location - -Prior experience with transport datasets a pre-requisite for the course. -Attendees are expected to: - -- Be comfortable with the use of R, using it for everyday data analysis tasks - - **Important: you should have completed the [Introduction to R DataCamp Course](https://www.datacamp.com/courses/free-introduction-to-r), especially if you are not an experienced R user** - - **You should ensure your R installation is up-to-date and be able to reproduce the map below** -- Have experience with transport datasets and understand their structure (you will be familiar with the contents of the [Transport chapter](https://geocompr.robinlovelace.net/transport.html) in Geocomputation with R) - -Participants are expected to brush-up on their knowledge before the course, for example by completing the exercises linked-to in the bullet points above. - -In terms of **software**, with RStudio installed will be available for course attendees. -The following packages should be installed prior to attending the course: -However, for maximum benefit, **we recommend participants bring their own laptops**, with a recent version of R installed (3.5.0 or later). -Steps to set-up a suitable R/RStudio environment are described in sections [2.3](https://csgillespie.github.io/efficientR/set-up.html#r-version) and [2.5](https://csgillespie.github.io/efficientR/set-up.html#rstudio) of the book [Efficient R Programming](https://csgillespie.github.io/efficientR/set-up.html). - -```{r, eval=FALSE} -install.packages(c( - "openair", - "osmdata", - "sf", - "stplanr", - "tidyverse", - "tmap" -)) -``` - -You can test your computer is ready for the course by **running the commands below**. -The resulting interactive map shows a good route (in light blue in the image below) from Leeds station to the Worsley Building (the course takes place on Level 11 of the building): - -```{r, message=FALSE, warning=FALSE, eval=FALSE} -library(osmdata) -library(sf) -library(tmap) -tmap_mode("view") -location = opq("leeds") %>% - add_osm_feature(key = "name", value = "Worsley Building") %>% - osmdata_sf() -route = sf::read_sf("https://git.io/fhnAr") -tm_shape(route) + - tm_lines(col = "blue", lwd = 7, alpha = 0.4) + - tm_shape(location$osm_polygons) + - tm_polygons(col = "red") + - tm_view(basemaps = leaflet::providers$OpenStreetMap) -``` - -```{r, echo=FALSE} -knitr::include_graphics("https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/tds-oneday_files/figure-gfm/unnamed-chunk-2-1.png") -``` - - -## Background reading - -It would be useful if participants could acquaint themselves with the following resources. - -- Efficient R Programming: (ERP for short, with section numbers linked e.g. ERP 1.5.2) is a book and online resource (at [csgillespie.github.io/efficientR](https://csgillespie.github.io/efficientR)) on using R effectively (Gillespie and Lovelace 2016). - -- Introducing stplanr: an introductory vignette on **stplanr**, accessible via the following command (assuming **stplanr** is installed): - -```{r, eval=FALSE} -vignette("introducing-stplanr") -``` - -- R for Data Science (R4DS): A book and online resource we use to teach R objects (also of wider insterest): http://r4ds.had.co.nz - -## Agenda - -- Registration and refreshments (09:00 - 09:20) -- Getting set-up in the cluster (09:20 - 09:30) - -- Finding, downloading, importing transport data (09:30 - 11:00) - - An overview of data portals - - Origin-destination data - - OpenStreetMap data - - Other data sources - -**11:00 - 11:10 Coffee break** - -- Working with spatio-temporal data (11:10 - 12:30) - - Introduction to STATS19 - - Temporal analysis - - Spatial analysis - - Analysis and modelling - -**LUNCH: 12:30 - 13:30** - -- Traffic data and pollution analysis with R (13:30 - 15:30, delivered by [Dr James Tate](http://www.its.leeds.ac.uk/people/j.tate)) - - An introduction to the **openair** package - - Traffic count data - - Meteorological data - - Air pollution data: daily, weekly and seasonal variability - - Visualising air pollution data and next steps - -**15:30 - 15:45 Refreshments** - -- From desire lines to route networks (15:45 - 16:45) - - Handling OD data - - Creating 'desire lines' from OD and zone data - - Route allocation and route network creation - - Route network analysis (comparing with other datasets) - -- Discussion and applying the methods to your data (16:00 onwards) - - -### Course tutors - -**[Robin Lovelace](http://robinlovelace.net/)** is a researcher at the Leeds Institute for Transport Studies ([ITS](http://www.its.leeds.ac.uk/)) and the Leeds Institute for Data Analytics ([LIDA](http://lida.leeds.ac.uk/about-lida/contact/)). -Robin has many years of experience of using R for academic -research and has taught numerous R courses at all levels. -He has developed popular R resources including -the popular books [Efficient R Programming](http://shop.oreilly.com/product/0636920047995.do) (Gillespie and Lovelace 2016), -[Spatial Microsimulation with R](https://github.com/Robinlovelace/spatial-microsim-book) (Lovelace and Dumont 2016), and -[Geocomputation with R](https://geocompr.robinlovelace.net/) (Lovelace et al. 2019). - -These skills have been applied on a number of projects with real-world applications, including the [Propensity to Cycle Tool](http://www.pct.bike/), a nationally scalable interactive online mapping -application, and the [**stplanr**](https://github.com/ropensci/stplanr) package. - -**[James Tate](http://www.its.leeds.ac.uk/people/j.tate)** is a vehicle emissions and air quality expert focussing on the impacts of road transport on the environment. He has developed and deployed new approaches to survey and model the emission performance of the UK/ EU road transport fleet. James has been using R as the primary tool in his data analysis workflow for a decade and has developed popular modules teaching R to Master's students in ITS. - -### Further information & how to book - -The course will be held in the Leeds Institute for Data Analytics (see [lida.leeds.ac.uk/about-lida/contact/](http://lida.leeds.ac.uk/about-lida/contact/) for details and a map). - -The course is open to ITS Masters and PhD students, and external delegates. The fee includes learning materials, lunch and refreshments during the course, but not overnight accommodation. The course is also available as bespoke or in-company training. - -Course bookings and other enquiries can be made via a booking form available from [http://www.its.leeds.ac.uk/courses/cpd/how-to-book/](its.leeds.ac.uk/courses/cpd/how-to-book/). - -For enquiries please contact the Short Courses Co-ordinator: - -Email: Kylie Norman - -### References - -Lovelace, Robin, and Morgane Dumont. 2016. Spatial Microsimulation with R. Available from [CRC Press](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/9781498711548). - -Gillespie, C., Lovelace, R., 2016. Efficient R Programming: A Practical Guide to Smarter Programming. Available from [O’Reilly Media](http://shop.oreilly.com/product/0636920047995.do). - -Lovelace, R., Nowosad, J., Muenchow, J., 2019. Geocomputation with R. CRC Press. - - - - - - - diff --git a/courses/tds-oneday.md b/courses/tds-oneday.md deleted file mode 100644 index 599e1f9..0000000 --- a/courses/tds-oneday.md +++ /dev/null @@ -1,174 +0,0 @@ - -Transport Data Science with R -============================= - -Dates and prices ----------------- - -Next on: 5th April. - -| Delegate type | Price (£ early bird) | Price (£) | -|----------------|:---------------------|:----------| -| Private sector | 400 | 450 | -| Public sector | 300 | 350 | -| Student | 200 | 250 | - -For booking, see here: - - -Overview --------- - -This course teaches two skill-sets that are fundamental in modern transport research: programming and data analytics. Combining these enables powerful transport planning and analysis workflows for tackling a wide range of problems, including: - -- How to find, download and import a range of transport datasets? -- How to develop automated and reproducible transport planning workflows? -- How can increasingly available datasets on air quality, traffic and active travel be used to inform policy? -- How to visualise results in an attractive and potentially on-line and interactive manner? - -This course will provide tools, code, data and, above all, face-to-face teaching to answer these questions and more, with the statistical programming language R. The data science approach opens a world of possibilities for generating insight from your transport datasets. The for researchers in the public sector, academia and industry. - -Learning objectives -------------------- - -By the end of the course, you will be able to: - -- Find, download and import a variety of transport datasets, including from OpenStreetMap and government data portals -- Work with, analyse and model transport data with spatial, temporal and demographic attributes -- Work with air polution data in R and compare with transport behaviours -- Generate and analyse route networks for transport planning with reference to: - - Origin-destination (OD) data - - Geographic desire lines - - Route allocation using different routing services - - Route network generation and analysis - -Pre-requisites and location ---------------------------- - -Prior experience with transport datasets a pre-requisite for the course. Attendees are expected to: - -- Be comfortable with the use of R, using it for everyday data analysis tasks - - **Important: you should have completed the [Introduction to R DataCamp Course](https://www.datacamp.com/courses/free-introduction-to-r), especially if you are not an experienced R user** - - **You should ensure your R installation is up-to-date and be able to reproduce the map below** -- Have experience with transport datasets and understand their structure (you will be familiar with the contents of the [Transport chapter](https://geocompr.robinlovelace.net/transport.html) in Geocomputation with R) - -Participants are expected to brush-up on their knowledge before the course, for example by completing the exercises linked-to in the bullet points above. - -In terms of **software**, with RStudio installed will be available for course attendees. The following packages should be installed prior to attending the course: However, for maximum benefit, **we recommend participants bring their own laptops**, with a recent version of R installed (3.5.0 or later). Steps to set-up a suitable R/RStudio environment are described in sections [2.3](https://csgillespie.github.io/efficientR/set-up.html#r-version) and [2.5](https://csgillespie.github.io/efficientR/set-up.html#rstudio) of the book [Efficient R Programming](https://csgillespie.github.io/efficientR/set-up.html). - -``` r -install.packages(c( - "openair", - "osmdata", - "sf", - "stplanr", - "tidyverse", - "tmap" -)) -``` - -You can test your computer is ready for the course by **running the commands below**. The resulting interactive map shows a good route (in light blue in the image below) from Leeds station to the Worsley Building (the course takes place on Level 11 of the building): - -``` r -library(osmdata) -library(sf) -library(tmap) -tmap_mode("view") -location = opq("leeds") %>% - add_osm_feature(key = "name", value = "Worsley Building") %>% - osmdata_sf() -route = sf::read_sf("https://git.io/fhnAr") -tm_shape(route) + - tm_lines(col = "blue", lwd = 7, alpha = 0.4) + - tm_shape(location$osm_polygons) + - tm_polygons(col = "red") + - tm_view(basemaps = leaflet::providers$OpenStreetMap) -``` - -![](https://raw.githubusercontent.com/ITSLeeds/TDS/master/courses/tds-oneday_files/figure-gfm/unnamed-chunk-2-1.png) - -Background reading ------------------- - -It would be useful if participants could acquaint themselves with the following resources. - -- Efficient R Programming: (ERP for short, with section numbers linked e.g. ERP 1.5.2) is a book and online resource (at [csgillespie.github.io/efficientR](https://csgillespie.github.io/efficientR)) on using R effectively (Gillespie and Lovelace 2016). - -- Introducing stplanr: an introductory vignette on **stplanr**, accessible via the following command (assuming **stplanr** is installed): - -``` r -vignette("introducing-stplanr") -``` - -- R for Data Science (R4DS): A book and online resource we use to teach R objects (also of wider insterest): - -Agenda ------- - -- Registration and refreshments (09:00 - 09:20) -- Getting set-up in the cluster (09:20 - 09:30) - -- Finding, downloading, importing transport data (09:30 - 11:00) - - An overview of data portals - - Origin-destination data - - OpenStreetMap data - - Other data sources - -**11:00 - 11:10 Coffee break** - -- Working with spatio-temporal data (11:10 - 12:30) - - Introduction to STATS19 - - Temporal analysis - - Spatial analysis - - Analysis and modelling - -**LUNCH: 12:30 - 13:30** - -- Traffic data and pollution analysis with R (13:30 - 15:30, delivered by [Dr James Tate](http://www.its.leeds.ac.uk/people/j.tate)) - - An introduction to the **openair** package - - Traffic count data - - Meteorological data - - Air pollution data: daily, weekly and seasonal variability - - Visualising air pollution data and next steps - -**15:30 - 15:45 Refreshments** - -- From desire lines to route networks (15:45 - 16:45) - - Handling OD data - - Creating 'desire lines' from OD and zone data - - Route allocation and route network creation - - Route network analysis (comparing with other datasets) -- Discussion and applying the methods to your data (16:00 onwards) - -### Course tutors - -**[Robin Lovelace](http://robinlovelace.net/)** is a researcher at the Leeds Institute for Transport Studies ([ITS](http://www.its.leeds.ac.uk/)) and the Leeds Institute for Data Analytics ([LIDA](http://lida.leeds.ac.uk/about-lida/contact/)). Robin has many years of experience of using R for academic research and has taught numerous R courses at all levels. He has developed popular R resources including the popular books [Efficient R Programming](http://shop.oreilly.com/product/0636920047995.do) (Gillespie and Lovelace 2016), [Spatial Microsimulation with R](https://github.com/Robinlovelace/spatial-microsim-book) (Lovelace and Dumont 2016), and [Geocomputation with R](https://geocompr.robinlovelace.net/) (Lovelace et al. 2019). - -These skills have been applied on a number of projects with real-world applications, including the [Propensity to Cycle Tool](http://www.pct.bike/), a nationally scalable interactive online mapping application, and the [**stplanr**](https://github.com/ropensci/stplanr) package. - -**[James Tate](http://www.its.leeds.ac.uk/people/j.tate)** is a vehicle emissions and air quality expert focussing on the impacts of road transport on the environment. He has developed and deployed new approaches to survey and model the emission performance of the UK/ EU road transport fleet. James has been using R as the primary tool in his data analysis workflow for a decade and has developed popular modules teaching R to Master's students in ITS. - -### Further information & how to book - -The course will be held in the Leeds Institute for Data Analytics (see [lida.leeds.ac.uk/about-lida/contact/](http://lida.leeds.ac.uk/about-lida/contact/) for details and a map). - -The course is open to ITS Masters and PhD students, and external delegates. The fee includes learning materials, lunch and refreshments during the course, but not overnight accommodation. The course is also available as bespoke or in-company training. - -Course bookings and other enquiries can be made via a booking form available from [http://www.its.leeds.ac.uk/courses/cpd/how-to-book/](its.leeds.ac.uk/courses/cpd/how-to-book/). - -For enquiries please contact the Short Courses Co-ordinator: - -Email: Kylie Norman - -### References - -Lovelace, Robin, and Morgane Dumont. 2016. Spatial Microsimulation with R. Available from [CRC Press](https://www.crcpress.com/Spatial-Microsimulation-with-R/Lovelace-Dumont/9781498711548). - -Gillespie, C., Lovelace, R., 2016. Efficient R Programming: A Practical Guide to Smarter Programming. Available from [O’Reilly Media](http://shop.oreilly.com/product/0636920047995.do). - -Lovelace, R., Nowosad, J., Muenchow, J., 2019. Geocomputation with R. CRC Press. - - - - - diff --git a/courses/training-setup-message.Rmd b/courses/training-setup-message.Rmd deleted file mode 100644 index 2275e2a..0000000 --- a/courses/training-setup-message.Rmd +++ /dev/null @@ -1,102 +0,0 @@ ---- -title: "An introduction to road safety analysis with R: setup notes" -output: github_document -bibliography: - - references.bib - - ../tds.bib ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - out.width = "50%" -) -``` - -These are prerequisites for the upcoming course on 4th December, 9AM. - -The course is located in the EC Stone Cluster 6.68, which is a 5 minute walk from ITS - see here for directions: https://it.leeds.ac.uk/it?id=kb_article&sysparm_article=KB0011752 - -**Note that it takes time to startup your laptop or login to the cluster computers so please arrive by 09:00 prompt.** - - -## Installing RStudio - -**Your computer should also have the necessary software installed before the session.** - -You are expected to have **RStudio properly installed on your computer** or **to have tested RStudio on a cluster computer** before the R workshop on 4th December. -I encourage you to install R and RStudio, and bring your own laptop for the course because the cluster computers are slow, and you will have more control over your R setup that way. -To learn how to install R and RStudio, see [here](https://courses.edx.org/courses/UTAustinX/UT.7.01x/3T2014/56c5437b88fa43cf828bff5371c6a924/). - -**Test that your RStudio installation works before the course** by opening the RStudio, pasting the following line of code into the console on the bottom left of RStudio and pressing Enter: - -```r -source("https://git.io/JeaZH") -``` - -If you see a figure appear on the right of the screen, congratulations, your RStudio installation is ready to go. -If not, copy and paste the following code into the console: - -```{r, eval=FALSE} -install.packages("remotes") -pkgs = c( - "pct", # package for getting travel data in the UK - "sf", # spatial data package - "stats19", # downloads and formats open stats19 crash data - "stplanr", # for working with origin-destination and route data - "tidyverse", # a package for user friendly data science - "tmap" # for making maps -) -remotes::install_cran(pkgs) -# remotes::install_github("ITSLeeds/pct") -``` - -## Prior reading - -### Esential - -To read up on R, we recommend reading Chapter 1 Getting Started with Data in R of the online book Statistical Inference via Data Science, which can be found here: https://moderndive.netlify.com/1-getting-started.html - -Reading sections 1.1 to 1.3 of that book and trying a few of the examples are considered **essential prerequisites**, unless you are already experienced with R. - -### Optional - -Optionally, if you want a more interactive learning environment, you can try getting started with the free [DataCamp](https://www.datacamp.com/courses/free-introduction-to-r) course. -Other good resources can be found at [education.rstudio.com/learn](https://education.rstudio.com/learn/beginner/). - -And for more information on how R can be used for transport research, the Transportation chapter of Geocomputation with R [@lovelace_geocomputation_2019] is a good place to start: https://geocompr.robinlovelace.net/transport.html - -A more detailed resource on R for transport planning is @lovelace_stplanr:_2018. - -For an introduction to data science with R, see @grolemund_r_2016:1. - -## Another test of R and RStudio - -As another test, try running the following commands from RStudio (which should result in a map): - - - -```{r message=FALSE, eval=FALSE} -library(stats19) -library(tidyverse) -library(tmap) # installed alongside mapview -crashes = get_stats19(year = 2017, type = "ac") -crashes_iow = crashes %>% - filter(local_authority_district == "Isle of Wight") %>% - format_sf() - -# basic plot -plot(crashes_iow) -``` - -You should see results like those shown in the map here: https://github.com/ropensci/stats19/issues/105 - -If you cannot create that map by running the code above before the course, get in touch with us, e.g. by writing a comment under that github issue page (Note: You will need a github account). - -For an online version of these instructions see here: https://github.com/ITSLeeds/TDS/blob/master/courses/training-setup-message.md - - -## References - - diff --git a/courses/training-setup-message.md b/courses/training-setup-message.md deleted file mode 100644 index 3307bc3..0000000 --- a/courses/training-setup-message.md +++ /dev/null @@ -1,141 +0,0 @@ -An introduction to road safety analysis with R: setup notes -================ - -These are prerequisites for the upcoming course on 4th December, 9AM. - -The course is located in the EC Stone Cluster 6.68, which is a 5 minute -walk from ITS - see here for directions: - - -**Note that it takes time to startup your laptop or login to the cluster -computers so please arrive by 09:00 prompt.** - -## Installing RStudio - -**Your computer should also have the necessary software installed before -the session.** - -You are expected to have **RStudio properly installed on your computer** -or **to have tested RStudio on a cluster computer** before the R -workshop on 4th December. I encourage you to install R and RStudio, and -bring your own laptop for the course because the cluster computers are -slow, and you will have more control over your R setup that way. To -learn how to install R and RStudio, see -[here](https://courses.edx.org/courses/UTAustinX/UT.7.01x/3T2014/56c5437b88fa43cf828bff5371c6a924/). - -**Test that your RStudio installation works before the course** by -opening the RStudio, pasting the following line of code into the console -on the bottom left of RStudio and pressing Enter: - -``` r -source("https://git.io/JeaZH") -``` - -If you see a figure appear on the right of the screen, congratulations, -your RStudio installation is ready to go. If not, copy and paste the -following code into the console: - -``` r -install.packages("remotes") -pkgs = c( - "pct", # package for getting travel data in the UK - "sf", # spatial data package - "stats19", # downloads and formats open stats19 crash data - "stplanr", # for working with origin-destination and route data - "tidyverse", # a package for user friendly data science - "tmap" # for making maps -) -remotes::install_cran(pkgs) -# remotes::install_github("ITSLeeds/pct") -``` - -## Prior reading - -### Esential - -To read up on R, we recommend reading Chapter 1 Getting Started with -Data in R of the online book Statistical Inference via Data Science, -which can be found here: - - -Reading sections 1.1 to 1.3 of that book and trying a few of the -examples are considered **essential prerequisites**, unless you are -already experienced with R. - -### Optional - -Optionally, if you want a more interactive learning environment, you can -try getting started with the free -[DataCamp](https://www.datacamp.com/courses/free-introduction-to-r) -course. Other good resources can be found at -[education.rstudio.com/learn](https://education.rstudio.com/learn/beginner/). - -And for more information on how R can be used for transport research, -the Transportation chapter of Geocomputation with R (Lovelace, Nowosad, -and Meunchow 2019) is a good place to start: - - -A more detailed resource on R for transport planning is Lovelace and -Ellison (2018). - -For an introduction to data science with R, see Grolemund and Wickham -(2016). - -## Another test of R and RStudio - -As another test, try running the following commands from RStudio (which -should result in a -map): - - - -``` r -library(stats19) -library(tidyverse) -library(tmap) # installed alongside mapview -crashes = get_stats19(year = 2017, type = "ac") -crashes_iow = crashes %>% - filter(local_authority_district == "Isle of Wight") %>% - format_sf() - -# basic plot -plot(crashes_iow) -``` - -You should see results like those shown in the map here: - - -If you cannot create that map by running the code above before the -course, get in touch with us, e.g. by writing a comment under that -github issue page (Note: You will need a github account). - -For an online version of these instructions see here: - - -## References - -
- -
- -Grolemund, Garrett, and Hadley Wickham. 2016. *R for Data Science*. -O’Reilly Media. - -
- -
- -Lovelace, Robin, and Richard Ellison. 2018. “Stplanr: A Package for -Transport Planning.” *The R Journal* 10 (2): 7–23. -. - -
- -
- -Lovelace, Robin, Jakub Nowosad, and Jannes Meunchow. 2019. -*Geocomputation with R*. CRC Press. . - -
- -
diff --git a/courses/week1-announcement.Rmd b/courses/week1-announcement.Rmd deleted file mode 100644 index d8caa1e..0000000 --- a/courses/week1-announcement.Rmd +++ /dev/null @@ -1,37 +0,0 @@ - - -You can access a .ics file to see times and locations of all the lectures and practicals in an online calendar such as Outlook (recommended, the University of Leeds system) or Google Calendar by downloading this file and importing it to your online calendar: https://github.com/ITSLeeds/TDS/releases/download/0.20.1/tds-timetable.ics - -It should look something like this: - -![](https://user-images.githubusercontent.com/1825120/73278079-f827e580-41e2-11ea-9e91-38bbd8545402.png) - -There were a few questions. - -- In terms of 'office hours' 1 hour after the practical or lecture sessions are best, I have some time available on Thursday mornings also, see my calendar [here](https://outlook.office365.com/owa/calendar/63f6c4e85d124df6a20656ade8e71faa@leeds.ac.uk/32e1cb4137f4414b8d7644453ec4b10414316826143036893453/calendar.html). -- The practical sessions are 3 hours, not 2.5 hours. However, not all of that will have contact time from me. - -- To install recent versions of R and RStudio, with the packages you'll need for the course, please follow instructions here: https://docs.ropensci.org/stats19/articles/stats19-training-setup.html - -To check your installation is working, try running the following commands: - -```{r testcode, message=FALSE, warning=FALSE} -library(stats19) -library(tidyverse) -library(tmap) # installed alongside mapview -crashes = get_stats19(year = 2017, type = "ac") -crashes_iow = crashes %>% - filter(local_authority_district == "Isle of Wight") %>% - format_sf() - -# basic plot -plot(crashes_iow) -``` - -You should see a plot like that shown above. - -The slides from the lecture can be found here: https://itsleeds.github.io/TDS/slides/1-intro.html#1 - -Note the links to the essential reading list: https://github.com/ITSLeeds/TDS/blob/master/catalogue.md#essential - -Bonus: if you want to see how I generated the timetable, try reproducing this script: https://github.com/ITSLeeds/TDS/blob/master/code-r/timetable2.R \ No newline at end of file diff --git a/coursework-template.Rmd b/coursework-template.Rmd deleted file mode 100644 index 72cc105..0000000 --- a/coursework-template.Rmd +++ /dev/null @@ -1,216 +0,0 @@ ---- -title: "Coursework submission for Transport Data Science (TRAN5340M)" -subtitle: "Enter your own title here (e.g. Exploring open transport data: a study of the Isle of Wight)" -output: github_document -# output: -# pdf_document: -# number_sections: true -author: "Student 12345" -# for html output: -# output: -# html_document: -# number_sections: true -# for pdf output: -bibliography: references.bib ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, eval = TRUE) -``` - -# Introduction - -This template contains information and suggested headings for the TDS module. -Do not submit coursework that contains this note or any text (other than the headings) from this template. -It is just designed to get you started. You will submit the .Rmd file, optionally included in a .zip file if you want to include data, and the resulting PDF document as your coursework submission. - -As outlined in the module catalogue, the coursework should be: - -- A maximum of 3000 words long, excluding code, figure captions and references -- A maximum of 10 pages long, excluding references and appendices (you should include your best work and think about maximising the use of space - the chunk option `out.width="50%"`, for example, can help with this as outlined [here](https://bookdown.org/yihui/bookdown/figures.html) ) - - -## RMarkdown - -This is an R Markdown file. -You can set the output by changing `output: github_document` to something different, like `output: html_document`. -You will need to submit your work as a pdf document, which can be generated by converting html output to pdf (e.g. with the `pagedown` package) or (recommended) by setting the output to `pdf_document`. -The first lines of your RMarkdown document could look something like this to ensure that the output is a PDF document and that the R code does not run (set `eval = TRUE` to make the R code run): - -``` ---- -title: "Coursework submission for Transport Data Science (TRAN5340M)" -subtitle: "Enter your own title here (e.g. Exploring open transport data: a study of the Isle of Wight)" -output: - pdf_document: - number_sections: true -author: "Student 12345" -bibliography: references.bib ---- - -``` - -```{r, eval=FALSE} -knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, eval = FALSE) -``` - - - -See here for more info: https://rmarkdown.rstudio.com/lesson-2.html - - -When you open this file in RStudio and click the **Knit** button all R code chunks are run and a markdown file (.md) suitable for publishing to GitHub is generated. - -To ensure the document is reproducible, you should include a code chunk that shows which packages you used, e.g.: - -```{r, message=FALSE} -# install.packages("remotes") -remotes::install_github("itsleeds/pct") -remotes::install_github("itsleeds/geofabrik") -remotes::install_github("ropensci/stats19") -library(pct) -library(sf) -library(stplanr) -library(tidyverse) -library(tmap) -``` - -You can add references manually or with `[@citation-key]` references linking to a .bib file like this[@lovelace_stplanr_2017]. -And this [@fox_data_2018]. - - -## Including Code - -You can include R code in the document as follows: - -```{r cars} -summary(cars) -``` - -## Including Plots - -You can also embed plots, for example: - -```{r pressure, echo=FALSE} -plot(pressure) -``` - -Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. - -# Datasets used - -You can get zone, OD and even route data for any city in the UK with the following commands. -We got data for the Isle of Wight with the following commands: - -```{r, message=FALSE} -library(pct) -region_name = "isle-of-wight" -z = get_pct_zones(region = region_name) -od = get_od() -od_in_zones = od %>% - filter(geo_code1 %in% z$geo_code) %>% - filter(geo_code2 %in% z$geo_code) -desire_lines = od2line(od_in_zones, z) -``` - - -You could get data from OpenStreetMap with the `osmdata` package. - -```{r, eval=FALSE} -library(osmdata) -osm_data = opq("isle of wight") %>% - add_osm_feature(key = "highway", value = "primary") %>% - osmdata_sf() -``` - -```{r, echo=FALSE} -# to use pre-saved data online you can do something like this: -# saveRDS(osm_data, "osm_data_highways_primary.Rds") -# piggyback::pb_upload("osm_data_highways_primary.Rds") -# piggyback::pb_download_url("osm_data_highways_primary.Rds") -u = "https://github.com/ITSLeeds/TDS/releases/download/0.20.1/osm_data_highways_primary.Rds" -osm_data = readRDS(url(u)) -``` - -You can get large OSM datasets with `geofabrik`: - -```{r} -library(geofabrik) -iow_highways = get_geofabrik(name = "Isle of Wight", layer = "lines") -summary(as.factor(iow_highways$highway)) -iow_highways2 = iow_highways %>% - filter(!is.na(highway)) %>% - filter(!str_detect(string = highway, pattern = "primary|track|resi|service|foot")) -summary(as.factor(iow_highways2$highway)) -``` - - -You could get road casualty data with the `stats19` pakckage, as shown below. - -```{r} -crashes = stats19::get_stats19(year = 2018, output_format = "sf") %>% - sf::st_transform(crs = sf::st_crs(z)) - -crashes_in_region = crashes[z, ] -tm_shape(z) + - tm_fill("car_driver", palette = "Greys") + - tm_shape(iow_highways2) + - tm_lines(col = "green", lwd = 2) + - tm_shape(osm_data$osm_lines) + - tm_lines(col = "maxspeed", lwd = 5) + - tm_shape(crashes_in_region) + - tm_dots(size = 0.5, col = "red") -``` - -# Descriptive analysis - -```{r} -plot(desire_lines) -``` - -# Route analysis - -See [here](https://geocompr.robinlovelace.net/transport.html#routes) and [here](https://www.r-spatial.org/r/2019/09/26/spatial-networks.html) for details. - -```{r, echo=FALSE} -# devtools::install_github("luukvdmeer/sfnetworks") -``` - - -```{r} -sln = SpatialLinesNetwork(iow_highways2) -sln_clean = sln_clean_graph(sln) -plot(sln_clean@sl$`_ogr_geometry_`) -centrality = igraph::edge_betweenness(sln_clean@g) -centrality_normalised = centrality / mean(centrality) -``` - -```{r} -mapview::mapview(z) + - mapview::mapview(sln_clean@sl, lwd = centrality_normalised * 3, zcol = "maxspeed") -``` - - - - - -# Additional datasets - -# Policy analysis - -Here you could explain how you explored answers to policy questions such as: - -- how to make the roads safer? -- how to reduce congestion? -- where to build bike parking? - -# Discussion - -Include here limitations and ideas for further research. - -# Conclusion - -What are the main things we have learned from this project? - -# References - diff --git a/coursework-template.md b/coursework-template.md deleted file mode 100644 index 8574a66..0000000 --- a/coursework-template.md +++ /dev/null @@ -1,247 +0,0 @@ -Coursework submission for Transport Data Science (TRAN5340M) -================ -Student 12345 - -# Introduction - -This template contains information and suggested headings for the TDS -module. Do not submit coursework that contains this note or any text -(other than the headings) from this template. It is just designed to get -you started. You will submit the .Rmd file, optionally included in a -.zip file if you want to include data, and the resulting PDF document as -your coursework submission. - -As outlined in the module catalogue, the coursework should be: - - - A maximum of 3000 words long, excluding code, figure captions and - references - - A maximum of 10 pages long, excluding references and appendices (you - should include your best work and think about maximising the use of - space - the chunk option `out.width="50%"`, for example, can help - with this as outlined - [here](https://bookdown.org/yihui/bookdown/figures.html) ) - -## RMarkdown - -This is an R Markdown file. You can set the output by changing `output: -github_document` to something different, like `output: html_document`. -You will need to submit your work as a pdf document, which can be -generated by converting html output to pdf (e.g. with the `pagedown` -package) or (recommended) by setting the output to `pdf_document`. The -first lines of your RMarkdown document could look something like this to -ensure that the output is a PDF document and that the R code does not -run (set `eval = TRUE` to make the R code run): - - --- - title: "Coursework submission for Transport Data Science (TRAN5340M)" - subtitle: "Enter your own title here (e.g. Exploring open transport data: a study of the Isle of Wight)" - output: - pdf_document: - number_sections: true - author: "Student 12345" - bibliography: references.bib - --- - -``` r -knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, eval = FALSE) -``` - -See here for more info: - -When you open this file in RStudio and click the **Knit** button all R -code chunks are run and a markdown file (.md) suitable for publishing to -GitHub is generated. - -To ensure the document is reproducible, you should include a code chunk -that shows which packages you used, e.g.: - -``` r -# install.packages("remotes") -remotes::install_github("itsleeds/pct") -remotes::install_github("itsleeds/geofabrik") -remotes::install_github("ropensci/stats19") -library(pct) -library(sf) -library(stplanr) -library(tidyverse) -library(tmap) -``` - -You can add references manually or with `[@citation-key]` references -linking to a .bib file like this(Lovelace and Ellison 2017). And this -(Fox 2018). - -## Including Code - -You can include R code in the document as follows: - -``` r -summary(cars) -``` - - ## speed dist - ## Min. : 4.0 Min. : 2.00 - ## 1st Qu.:12.0 1st Qu.: 26.00 - ## Median :15.0 Median : 36.00 - ## Mean :15.4 Mean : 42.98 - ## 3rd Qu.:19.0 3rd Qu.: 56.00 - ## Max. :25.0 Max. :120.00 - -## Including Plots - -You can also embed plots, for example: - -![](coursework-template_files/figure-gfm/pressure-1.png) - -Note that the `echo = FALSE` parameter was added to the code chunk to -prevent printing of the R code that generated the plot. - -# Datasets used - -You can get zone, OD and even route data for any city in the UK with the -following commands. We got data for the Isle of Wight with the following -commands: - -``` r -library(pct) -region_name = "isle-of-wight" -z = get_pct_zones(region = region_name) -od = get_od() -od_in_zones = od %>% - filter(geo_code1 %in% z$geo_code) %>% - filter(geo_code2 %in% z$geo_code) -desire_lines = od2line(od_in_zones, z) -``` - -You could get data from OpenStreetMap with the `osmdata` package. - -``` r -library(osmdata) -osm_data = opq("isle of wight") %>% - add_osm_feature(key = "highway", value = "primary") %>% - osmdata_sf() -``` - -You can get large OSM datasets with `geofabrik`: - -``` r -library(geofabrik) -iow_highways = get_geofabrik(name = "Isle of Wight", layer = "lines") -summary(as.factor(iow_highways$highway)) -``` - - ## bridleway construction cycleway footway living_street - ## 170 16 139 5436 3 - ## path pedestrian primary primary_link proposed - ## 370 18 548 18 12 - ## residential secondary secondary_link service steps - ## 2400 370 1 6746 368 - ## tertiary tertiary_link track unclassified NA's - ## 467 3 4312 835 22555 - -``` r -iow_highways2 = iow_highways %>% - filter(!is.na(highway)) %>% - filter(!str_detect(string = highway, pattern = "primary|track|resi|service|foot")) -summary(as.factor(iow_highways2$highway)) -``` - - ## bridleway construction cycleway living_street path - ## 170 16 139 3 370 - ## pedestrian proposed secondary secondary_link steps - ## 18 12 370 1 368 - ## tertiary tertiary_link unclassified - ## 467 3 835 - -You could get road casualty data with the `stats19` pakckage, as shown -below. - -``` r -crashes = stats19::get_stats19(year = 2018, output_format = "sf") %>% - sf::st_transform(crs = sf::st_crs(z)) - -crashes_in_region = crashes[z, ] -tm_shape(z) + - tm_fill("car_driver", palette = "Greys") + - tm_shape(iow_highways2) + - tm_lines(col = "green", lwd = 2) + - tm_shape(osm_data$osm_lines) + - tm_lines(col = "maxspeed", lwd = 5) + - tm_shape(crashes_in_region) + - tm_dots(size = 0.5, col = "red") -``` - -![](coursework-template_files/figure-gfm/unnamed-chunk-7-1.png) - -# Descriptive analysis - -``` r -plot(desire_lines) -``` - -![](coursework-template_files/figure-gfm/unnamed-chunk-8-1.png) - -# Route analysis - -See [here](https://geocompr.robinlovelace.net/transport.html#routes) and -[here](https://www.r-spatial.org/r/2019/09/26/spatial-networks.html) for -details. - -``` r -sln = SpatialLinesNetwork(iow_highways2) -sln_clean = sln_clean_graph(sln) -plot(sln_clean@sl$`_ogr_geometry_`) -``` - -![](coursework-template_files/figure-gfm/unnamed-chunk-10-1.png) - -``` r -centrality = igraph::edge_betweenness(sln_clean@g) -centrality_normalised = centrality / mean(centrality) -``` - -``` r -mapview::mapview(z) + - mapview::mapview(sln_clean@sl, lwd = centrality_normalised * 3, zcol = "maxspeed") -``` - -![](coursework-template_files/figure-gfm/unnamed-chunk-11-1.png) - -# Additional datasets - -# Policy analysis - -Here you could explain how you explored answers to policy questions such -as: - - - how to make the roads safer? - - how to reduce congestion? - - where to build bike parking? - -# Discussion - -Include here limitations and ideas for further research. - -# Conclusion - -What are the main things we have learned from this project? - -# References - -
- -
- -Fox, Charles. 2018. *Data Science for Transport: A Self-Study Guide with -Computer Exercises*. 1st ed. 2018 edition. New York, NY: Springer. - -
- -
- -Lovelace, Robin, and Richard Ellison. 2017. “Stplanr: A Package for -Transport Planning.” *The R Journal*. - -
- -
diff --git a/coursework-template_files/figure-gfm/pressure-1.png b/coursework-template_files/figure-gfm/pressure-1.png deleted file mode 100644 index 148a276..0000000 Binary files a/coursework-template_files/figure-gfm/pressure-1.png and /dev/null differ diff --git a/coursework-template_files/figure-gfm/unnamed-chunk-12-1.png b/coursework-template_files/figure-gfm/unnamed-chunk-12-1.png deleted file mode 100644 index 4d40b72..0000000 Binary files a/coursework-template_files/figure-gfm/unnamed-chunk-12-1.png and /dev/null differ diff --git a/demos/2-pager/2-pager.bib b/demos/2-pager/2-pager.bib deleted file mode 100644 index 0fd76ff..0000000 --- a/demos/2-pager/2-pager.bib +++ /dev/null @@ -1,38 +0,0 @@ - -@book{pereira2023, - title = {Introduction to urban accessibility: a practical guide with R}, - author = {Pereira, Rafael H. M. and Herszenhut, Daniel}, - year = {2023}, - date = {2023}, - url = {https://ipeagit.github.io/intro_access_book}, - note = {DOI: 10.38116/9786556350547}, - langid = {en} -} - -@article{lovelace2021, - title = {Open source tools for geographic analysis in transport planning}, - author = {Lovelace, Robin}, - year = {2021}, - month = {01}, - date = {2021-01-16}, - journal = {Journal of Geographical Systems}, - doi = {10.1007/s10109-020-00342-2}, - url = {https://doi.org/10.1007/s10109-020-00342-2}, - note = {Citation Key: lovelace{\_}open{\_}2021}, - langid = {en} -} - -@article{lovelace2017, - title = {The Propensity to Cycle Tool: An open source online system for sustainable transport planning}, - author = {Lovelace, Robin and Goodman, Anna and Aldred, Rachel and Berkoff, Nikolai and Abbas, Ali and Woodcock, James}, - year = {2017}, - month = {01}, - date = {2017-01-01}, - journal = {Journal of Transport and Land Use}, - volume = {10}, - number = {1}, - doi = {10.5198/jtlu.2016.862}, - url = {https://doi.org/10.5198/jtlu.2016.862}, - note = {Citation Key: lovelace{\_}propensity{\_}2017}, - langid = {en} -} diff --git a/demos/2-pager/2-pager.qmd b/demos/2-pager/2-pager.qmd deleted file mode 100644 index ea93e53..0000000 --- a/demos/2-pager/2-pager.qmd +++ /dev/null @@ -1,98 +0,0 @@ ---- -format: pdf -title: "Transport Data Science Coursework Plan" -bibliography: 2-pager.bib ---- - -```{=html} - -``` -# Title - -The draft title of my project is Exploring the Accessibility of Parks for Wombats in Melbourne. - -\[To be confirmed\] - -# Research questions considered and motivations - -Wombats are a key species - -# Prior work - -@pereira2023 provides an introduction to the field and guidance on accessibility analysis. - -@lovelace2021 provides an overview of existing tools for wombat accessibility analysis. - -# Input data - -I will use routes from the Propensity to Cycle Tool project as the main input dataset. - -@lovelace2017 describes these datasets in detail. - -See - -And - -# Questions for the course team - -Is the input dataset and research question too ambitious? - -I was wondering about using regression with the `xgboost` package. -I have tried it and managed to get some results shown below. - -# Analysis plan - -I have 4 full weeks to complete the 10 page report, in that time I will - -- Week 1: complete to introduction to the report and references, write data cleaning code and describe the input dataset - -- ... - -- ... - -- ... - -# Importing the data and initial exploration - -See below for an initial exploration of the data - -```{r} -#| include: false -library(stplanr) -library(tidyverse) -routes_imported = routes_fast_sf -names(routes_imported) -desire_lines = flowlines_sf -desire_lines$id = 1:nrow(desire_lines) -routes_imported$id = 1:nrow(routes_imported) -routes_joined = left_join( - routes_imported, - desire_lines |> sf::st_drop_geometry() -) -``` - -We aggregated the data with the `overline()` function from the `stplanr` package, resulting in @fig-routes (right). - -```{r} -#| label: fig-routes -#| layout-ncol: 2 -#| fig-cap: "Routes in Melbourne, Australia" -#| fig-subcap: "Left: raw data; right: aggregated data" -routes_joined |> - select(Bicycle) |> - plot() -routes_overline = overline(routes_joined, "Bicycle") -plot(routes_overline) -``` - -# References \ No newline at end of file diff --git a/demos/demo-quarto-document.md b/demos/demo-quarto-document.md deleted file mode 100644 index 7084e85..0000000 --- a/demos/demo-quarto-document.md +++ /dev/null @@ -1,39 +0,0 @@ -# The impact of lockdowns on collisions - - -# Introduction - -This document explores the impact of lockdowns on the rate of -collisions. - -# Data and methods - -Datasets were taken from the STATS19 database and processed with the -`stats19` R package (Lovelace et al. 2019). - -# Results - -See the results in -Figure 1. - - - -# Conclusion - -# References - -
- -
- -Lovelace, Robin, Malcolm Morgan, Layik Hama, and Mark Padgham. 2019. -“Stats19 a Package for Working with Open Road Crash Data.” *Journal of -Open Source Software* 4 (33): 1181. -. - -
- -
diff --git a/demos/demo-quarto-document.qmd b/demos/demo-quarto-document.qmd deleted file mode 100644 index a84b1a4..0000000 --- a/demos/demo-quarto-document.qmd +++ /dev/null @@ -1,104 +0,0 @@ ---- -format: pdf -# Try replacing the above with this for PDF output -# format: pdf -title: "The impact of lockdowns on collisions" -number-sections: true -bibliography: references.bib -execute: - cache: refresh ---- - -# Introduction - -This document explores the impact of lockdowns on the rate of collisions. - -# Data and methods - -Datasets were taken from the STATS19 database and processed with the `stats19` R package [@lovelace2019]. - -# Results - -```{r} -#| include: false -library(tidyverse) -library(stats19) -# dl_stats19(year = 2020, type = "collision") -collisions_2020 = get_stats19(year = 2020, type = "collision") -``` - -See the results of downloading national crash data from 2020 in @fig-crashes-per-day. - -```{r} -#| label: fig-crashes-per-day -#| echo: false -#| fig-cap: "Crashes over time in 2020" -collisions_per_day = collisions_2020 |> - group_by(date) |> - summarise( - n = n() - ) -collisions_per_day |> - ggplot(aes(date, n)) + - geom_line(colour = "red") -``` - -The equivalent trend for Leeds is shown in @fig-trend-leeds. - -The column names available to us are: - -```{r} -names(collisions_2020) -``` - - -```{r} -#| echo: false -#| label: fig-trend-leeds -# Check the local authority of crashes: -collisions_wy = collisions_2020 |> - filter(police_force == "West Yorkshire") -collisions_per_day_wy = collisions_wy |> - group_by(date) |> - summarise( - n = n() - ) -collisions_per_day_wy |> - ggplot(aes(date, n)) + - geom_line(colour = "red") - - -``` - -Let's put them side-by-side. - -```{r} -# collisions_per_day_combined = bind_rows( -# collisions_per_day |> mutate(Area = "GB"), -# collisions_per_day_wy |> mutate(Area = "West Yorksire") -# ) -# # Not sure why the scales here do not work: -# collisions_per_day_combined |> -# ggplot(aes(date, n)) + -# geom_line() + -# facet_grid(~Area, shrink = TRUE) -g1 = collisions_per_day |> - ggplot(aes(date, n)) + - geom_line(colour = "red") -g2 = collisions_per_day_wy |> - ggplot(aes(date, n)) + - geom_line(colour = "red") -library(patchwork) -``` - -```{r} -#| echo: false -#| layout-ncol: 2 -g1 -g2 -``` - - -# Conclusion - -# References diff --git a/demos/demo-quarto-document.rmarkdown b/demos/demo-quarto-document.rmarkdown deleted file mode 100644 index a761848..0000000 --- a/demos/demo-quarto-document.rmarkdown +++ /dev/null @@ -1,113 +0,0 @@ ---- -format: pdf -# Try replacing the above with this for PDF output -# format: pdf -title: "The impact of lockdowns on collisions" -number-sections: true -bibliography: references.bib -execute: - cache: refresh ---- - - -# Introduction - -This document explores the impact of lockdowns on the rate of collisions. - -# Data and methods - -Datasets were taken from the STATS19 database and processed with the `stats19` R package [@lovelace2019]. - -# Results - - -```{r} -#| include: false -library(tidyverse) -library(stats19) -# dl_stats19(year = 2020, type = "collision") -collisions_2020 = get_stats19(year = 2020, type = "collision") -``` - - -See the results of downloading national crash data from 2020 in @fig-crashes-per-day. - - -```{r} -#| label: fig-crashes-per-day -#| echo: false -#| fig-cap: "Crashes over time in 2020" -collisions_per_day = collisions_2020 |> - group_by(date) |> - summarise( - n = n() - ) -collisions_per_day |> - ggplot(aes(date, n)) + - geom_line(colour = "red") -``` - - -The equivalent trend for Leeds is shown in @fig-trend-leeds. - -The column names available to us are: - - -```{r} -names(collisions_2020) -``` - -```{r} -#| echo: false -#| label: fig-trend-leeds -# Check the local authority of crashes: -collisions_wy = collisions_2020 |> - filter(police_force == "West Yorkshire") -collisions_per_day_wy = collisions_wy |> - group_by(date) |> - summarise( - n = n() - ) -collisions_per_day_wy |> - ggplot(aes(date, n)) + - geom_line(colour = "red") - - -``` - - -Let's put them side-by-side. - - -```{r} -# collisions_per_day_combined = bind_rows( -# collisions_per_day |> mutate(Area = "GB"), -# collisions_per_day_wy |> mutate(Area = "West Yorksire") -# ) -# # Not sure why the scales here do not work: -# collisions_per_day_combined |> -# ggplot(aes(date, n)) + -# geom_line() + -# facet_grid(~Area, shrink = TRUE) -g1 = collisions_per_day |> - ggplot(aes(date, n)) + - geom_line(colour = "red") -g2 = collisions_per_day_wy |> - ggplot(aes(date, n)) + - geom_line(colour = "red") -library(patchwork) -``` - -```{r} -#| echo: false -#| layout-ncol: 2 -g1 -g2 -``` - - - -# Conclusion - -# References - diff --git a/demos/references.bib b/demos/references.bib deleted file mode 100644 index 11d2a7b..0000000 --- a/demos/references.bib +++ /dev/null @@ -1,16 +0,0 @@ - -@article{lovelace2019, - title = {stats19 A package for working with open road crash data}, - author = {Lovelace, Robin and Morgan, Malcolm and Hama, Layik and Padgham, Mark}, - year = {2019}, - date = {2019}, - journal = {Journal of Open Source Software}, - pages = {1181}, - volume = {4}, - number = {33}, - doi = {10.21105/joss.01181}, - url = {http://doi.org/10.21105/joss.01181}, - note = {Citation Key Alias: stats192019 -tex.publisher: [object Object] -Citation Key: lovelace{\_}stats19{\_}2019} -} diff --git a/deploy.sh b/deploy.sh deleted file mode 100755 index 223f4a0..0000000 --- a/deploy.sh +++ /dev/null @@ -1,18 +0,0 @@ -gh repo clone itsleeds/TDS -cd TDS -git checkout gh-pages -ls # check contents -cd .. - -cp -Rv slides/* TDS/slides/ -cp -Rv practicals/* TDS/practicals/ - -cd TDS -git status -# git diff -git add -A -git commit -am 'update site' -git push origin gh-pages -git clean -f - -cd .. diff --git a/dissertation-ideas/cycleway-crash-risk/cycleway-stats19-tz-rl.Rmd b/dissertation-ideas/cycleway-crash-risk/cycleway-stats19-tz-rl.Rmd deleted file mode 100644 index 2b62b8a..0000000 --- a/dissertation-ideas/cycleway-crash-risk/cycleway-stats19-tz-rl.Rmd +++ /dev/null @@ -1,249 +0,0 @@ ---- -title: "stats19-locations" -# output: bookdown::pdf_document2 -bibliography: - - saferactive.bib - - my-bib.bib ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - eval = FALSE, - echo = FALSE, - message = FALSE, - warning = FALSE -) -# load packages -library(stats19) -library(tidyverse) -library(tmap) -``` - -# Introduction - -I suggest taking a look at and perhaps citing these papers and reports: - -[@aldred_cycling_2018] - -[@ding_roles_2020] - -[@feleke_comparative_2018] - -[@grolemund_data_2016] - -See here for more info: https://www.zotero.org/groups/418217/energy-and-transport/collections/9PTF9F44/item-list - - - -See here and in the .bib files for more citations: - - - - -# Descriptive data analysis - - -## Case study of Leeds-Bradford Cycle Superhighway - -A case study of a well-known cycleway in Leeds is shown in Figure \@ref(fig:cycleway1). - -```{r, eval=TRUE, fig.cap="Illustration of the Leeds-Bradford cycle superhighway. Source: OpenStreetMap.", fig.width=9} -# read in saved dataset -u = "https://github.com/ropensci/stats19/releases/download/1.1.0/superhighway.Rds" -superhighway = readRDS(url(u)) -plot(superhighway$geometry) -# tm_shape(superhighway) + -# tm_lines("highway") + -# tm_layout(asp = 5, frame = FALSE) -# + -# tm_layout(legend.outside = TRUE) -``` - - -## Cycleway data - -## Crash data - -## Cycling behaviour data - -[@lovelace_propensity_2017] - -## Explanatory variables - -E.g. speed from OSM data and number of junctions - -presence/absence of cycleway - -# Modelling crash rates on cycleways - - -# Discussion - -## Limitations - -## Policy relevance - -## Further research - - -# Conclusions - -```{r setup, eval=TRUE} -crashes = get_stats19(2012:2018, output_format = "sf") -#2018 -# fail use map_dfr... -# challenge: get more years -casualties_2018 = get_stats19(year = 2018, type = "cas") -casualties_cyclist_2018 = casualties_2018 %>% - filter(casualty_type == "Cyclist") -crashes_cyclist_2018 = crashes %>% - filter(accident_index %in% casualties_cyclist_2018$accident_index) -crashes_cyclist_2018 = sf::st_transform(crashes_cyclist_2018, 4326) -crashes_near_superhighway_2018 = crashes_cyclist_2018[superhighway_buffer, ] - -#total -casualties = get_stats19(2012:2018, type = "cas") -casualties_cyclist = casualties %>% - filter(casualty_type == "Cyclist") -crashes_cyclist = crashes %>% - filter(accident_index %in% casualties_cyclist$accident_index) -``` - -## OpenStreetMap data - -```{r} -library(sf) -library(osmdata) -data_osm = opq("leeds") %>% - add_osm_feature(key = "name", value = "Cycle Superhighway 1") %>% - osmdata_sf() -data_osm -plot(data_osm$osm_lines$geometry) -wy = pct::get_pct_zones("west-yorkshire") -superhighway = data_osm$osm_lines[wy, ] -mapview::mapview(superhighway) -saveRDS(superhighway, "superhighway.Rds") -piggyback::pb_upload("superhighway.Rds", repo = "ropensci/stats19") -piggyback::pb_download_url("superhighway.Rds", repo = "ropensci/stats19") -``` - -```{r} -# get buffer -# the distance here use 20 which will not include other routes data but won't miss the junction data. -superhighway_buffer = stplanr::geo_buffer(superhighway, dist = 20) %>% - sf::st_union() -mapview::mapview(superhighway_buffer) -crashes_cyclist = sf::st_transform(crashes_cyclist, 4326) -crashes_near_superhighway = crashes_cyclist[superhighway_buffer, ] -mapview::mapview(crashes_near_superhighway) -summary(crashes_near_superhighway$datetime) -table(crashes_near_superhighway$accident_severity) -``` - -## Questions - -1. How to get casualties for more years, e.g. 2012 to 2018 (not just 2018)? - -I am not quite sure why the script code "casualties = get_stats19(2012:2018, type = "cas")" doesn't works. I run this code and get 56 observations in the end. "map_dfr" seems return data frames created by row-binding and column-binding respectively, so I first thought it may fail because different variable number (some years have 15 variables and some 16). But it turns out the NA will fill in the lack variable. - - -2. How to estimate the risk per year? -This raises another question, how to estimate the exposure near the cycleway? - -To avoid the "safety in number", the cyclist flow should be considered. On different segments of the road. The bicycle number varys from 0 to 387, it leads the exposure data varys. So it is better to consider separately for each crash location than treat the superhighway equally risky. - -Because the speed limit does not differ a lot on the entire road, the distance is the other factor of exposure. The exposure equation is multiply cycling volume per yearby road segment length (in vehicle-kilometers, VKM). The risk equals the crash number per year divide by exposure. The crash number could separates KSI from slight injuries. - -```{r} -#exposure data of the crash point -rnet_wy_cycling = pct::get_pct_rnet(region = "west-yorkshire") -plot(rnet_wy_cycling$geometry) -rnet_cycleway = sf::st_intersection(rnet_wy_cycling, superhighway_buffer) -plot(rnet_cycleway) -mapview::mapview(rnet_cycleway) - -#exposure of the crash point -test1<-crashes_near_superhighway -test2<-rnet_cycleway - -#if the crash geometry located in the segments. -mat = st_intersects(test1$geometry, test2$geometry, sparse = FALSE) -apply(mat, 1, any) -#all in false maybe because of the geometry deviation (not exactly located). -#alter to st_distance function. It calculates the distance matrix in meters using Great Circle distance. -#treat the crashes happen in the closed segement. -#The most minimum distances are controlled under 50m and the biggest one is 105m. -d<-data.frame(st_distance(test1$geometry,test2$geometry)) -nn.dist <- apply(d, 1, function(x) { - return(sort(x, partial = 1)[1]) -}) -nn.index <- apply(d, 1, function(x) {order(x, decreasing=F)[1]}) -newdata <- cbind(test1, test2[nn.index,], apply(d, 1, function(x) sort(x, decreasing=F)[2])) - -newdata$length_km = - as.numeric(sf::st_length(newdata$geometry.1)) -newdata$km_cycle_peryear = newdata$length_km * newdata$bicycle*2*260 - -nrow(newdata) / - (sum(newdata$km_cycle_peryear) / 1e9) - -``` - - - -How to estimate how numbers cycling changes per year (difficult, not essential to answer)? - -```{r} -rnet_wy_cycling = pct::get_pct_rnet(region = "west-yorkshire") -plot(rnet_wy_cycling$geometry) -rnet_cycleway = sf::st_intersection(rnet_wy_cycling, superhighway_buffer) -plot(rnet_cycleway) -mapview::mapview(rnet_cycleway) -# how many cycle journey segments in total? -sum(rnet_cycleway$bicycle) -# estimate million km cycled per year... -rnet_cycleway$length_km = - as.numeric(sf::st_length(rnet_cycleway)) / 1000 -summary(rnet_cycleway$length_km) -# estimate km cycled per working morning -rnet_cycleway$km_per_morning_workday = - rnet_cycleway$length_km * rnet_cycleway$bicycle -sum(rnet_cycleway$km_per_morning_workday) -rnet_cycleway$km_per_year = - rnet_cycleway$km_per_morning_workday * 2 * 200 -sum(rnet_cycleway$km_per_year) -# estimate risk - total crashes per bkm -nrow(crashes_near_superhighway) / - (sum(rnet_cycleway$km_per_year) / 1e9) -# estimate risk - total crashes per bkm -1 / - (sum(rnet_cycleway$km_per_year) / 1e9) -``` - -```{r} -remotes::install_github("itsleeds/osmextractr") -library(osmextractr) -wy_osm = oe_get(place = "West Yorkshire", extra_attributes = c("maxspeed")) -pryr::object_size(wy_osm) -wy_osm_cycleway = wy_osm[superhighway_buffer, , op = sf::st_within] -mapview::mapview(wy_osm_cycleway["highway"]) -wy_osm_cycleway$speed = as.character(wy_osm_cycleway$maxspeed) -table(wy_osm_cycleway$speed) -wy_osm_cycleway$speed_numeric = as.numeric(gsub(pattern = " mph", replacement = "", wy_osm_cycleway$maxspeed)) -table(wy_osm_cycleway$speed_numeric) -mean(wy_osm_cycleway$speed_numeric, na.rm = TRUE) # the average speed limit on the cycleway -table(wy_osm_cycleway$highway) -# calculate percentage on primary: -sum(wy_osm_cycleway$highway == "primary", na.rm = TRUE) / - sum(!is.na(wy_osm_cycleway$highway)) -# 0.1034483 -# calculate percentage on primary: -sum(wy_osm_cycleway$highway == "secondary", na.rm = TRUE) / - sum(!is.na(wy_osm_cycleway$highway)) -``` - - -# References - diff --git a/docker-notes.md b/docker-notes.md deleted file mode 100644 index 49d3f3e..0000000 --- a/docker-notes.md +++ /dev/null @@ -1,38 +0,0 @@ -## Docker - -If you want to run the software in a container (which can make package installation easier), you can use docker, which allows you to run a virtual operating system inside your main operating system. - -After you have [installed docker](https://docs.docker.com/install/), you should be able to run the software by executing the following commands in a terminal such as Windows PowerShell or the default terminal on Linx and MAC operating systems. - -For an R installation: - -```bash -docker run -d -p 8787:8787 -v $(pwd):/home/rstudio/data -e USERID=$UID -e PASSWORD=pickASafePassWord --name rstudio robinlovelace/geocompr -``` - -For a R/Python docker image (bigger, less well maintained): - -```bash -docker run -d -p 8787:8787 -v $(pwd):/home/rstudio/data -e USERID=$UID -e PASSWORD=pickASafePassWord --name rstudio robinlovelace/tds -``` - -This will: - -- Pull the docker image from https://hub.docker.com/r/robinlovelace/tds/ or the geocompr repo if it's not already on your computer -- Launch a locally hosted instance of RStudio Server which can be accessed at http://localhost:8787/ -- Mount your current working dirctory to the data folder in the home directly of the docker image - -After navigating to http://localhost:8787/ in a browser you should see a login screen. Username and password are rstudio. See https://github.com/rocker-org/rocker/wiki/Using-the-RStudio-image for details. - -Once in the container you can use all the R packages. -To access the pre-installed Python packages you will need to enter the following commands: - -```bash -conda activate -python -``` - -to go into the Python shell. Form more on running Python in RStudio see [community.rstudio.com](https://community.rstudio.com/t/r-python-in-ide/279). -A demonstration showing the `tds` docker image in action is illustrated below. - -![](https://user-images.githubusercontent.com/1825120/43570979-a41791c2-9633-11e8-9edd-f3e11bc884c1.gif) \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 21f87fa..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -FROM robinlovelace/geocompr - -ENV PATH /opt/conda/bin:$PATH - -RUN apt-get update --fix-missing && \ - apt-get install -y wget bzip2 ca-certificates curl git && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p /opt/conda && \ - rm ~/miniconda.sh && \ - /opt/conda/bin/conda clean -tipsy && \ - ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc - -RUN conda install numpy && \ - conda install geopandas && conda install scipy - -RUN conda install -c conda-forge osmnx - - diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 49d3f3e..0000000 --- a/docker/README.md +++ /dev/null @@ -1,38 +0,0 @@ -## Docker - -If you want to run the software in a container (which can make package installation easier), you can use docker, which allows you to run a virtual operating system inside your main operating system. - -After you have [installed docker](https://docs.docker.com/install/), you should be able to run the software by executing the following commands in a terminal such as Windows PowerShell or the default terminal on Linx and MAC operating systems. - -For an R installation: - -```bash -docker run -d -p 8787:8787 -v $(pwd):/home/rstudio/data -e USERID=$UID -e PASSWORD=pickASafePassWord --name rstudio robinlovelace/geocompr -``` - -For a R/Python docker image (bigger, less well maintained): - -```bash -docker run -d -p 8787:8787 -v $(pwd):/home/rstudio/data -e USERID=$UID -e PASSWORD=pickASafePassWord --name rstudio robinlovelace/tds -``` - -This will: - -- Pull the docker image from https://hub.docker.com/r/robinlovelace/tds/ or the geocompr repo if it's not already on your computer -- Launch a locally hosted instance of RStudio Server which can be accessed at http://localhost:8787/ -- Mount your current working dirctory to the data folder in the home directly of the docker image - -After navigating to http://localhost:8787/ in a browser you should see a login screen. Username and password are rstudio. See https://github.com/rocker-org/rocker/wiki/Using-the-RStudio-image for details. - -Once in the container you can use all the R packages. -To access the pre-installed Python packages you will need to enter the following commands: - -```bash -conda activate -python -``` - -to go into the Python shell. Form more on running Python in RStudio see [community.rstudio.com](https://community.rstudio.com/t/r-python-in-ide/279). -A demonstration showing the `tds` docker image in action is illustrated below. - -![](https://user-images.githubusercontent.com/1825120/43570979-a41791c2-9633-11e8-9edd-f3e11bc884c1.gif) \ No newline at end of file diff --git a/docker/how_to_docker.md b/docker/how_to_docker.md deleted file mode 100644 index cddcfd9..0000000 --- a/docker/how_to_docker.md +++ /dev/null @@ -1,8 +0,0 @@ -# Setting up requirements.txt - - -- Fork over a repository -- Clone down the repository -- Create a requirements.txt file -- pip freeze > requirements.txt - - Should look similar to the samplerequirements.txt file within the Docker folder within the repository diff --git a/docker/sample_requirements.txt b/docker/sample_requirements.txt deleted file mode 100644 index 19c854c..0000000 --- a/docker/sample_requirements.txt +++ /dev/null @@ -1,33 +0,0 @@ -asgiref==3.5.0 -black==22.1.0 -certifi==2022.12.7 -cffi==1.15.0 -charset-normalizer==2.0.12 -click==8.0.3 -cryptography==36.0.1 -defusedxml==0.7.1 -distlib==0.3.4 -Django==3.1.14 -django-allauth==0.43.0 -django-crispy-forms==1.10.0 -django-debug-toolbar==3.2.1 -filelock==3.6.0 -idna==3.3 -mypy-extensions==0.4.3 -oauthlib==3.2.2 -pathspec==0.9.0 -platformdirs==2.4.1 -pycparser==2.21 -pygame==2.1.2 -PyJWT==2.4.0 -python3-openid==3.2.0 -pytz==2021.3 -requests==2.27.1 -requests-oauthlib==1.3.1 -six==1.16.0 -sqlparse==0.4.2 -tomli==2.0.0 -typing-extensions==4.0.1 -urllib3==1.26.8 -virtualenv==20.13.2 -whitenoise==5.2.0 diff --git a/index.qmd b/index.qmd new file mode 100644 index 0000000..9c05c7b --- /dev/null +++ b/index.qmd @@ -0,0 +1,54 @@ +--- +title: "Transport Data Science" +--- + +A module teaching how to use data science to solve transport problems. + +# Prerequisites + +## General computing prerequisites + +You should have the latest stable release of R (4.3.0 or above) and be comfortable with computing in general, for example creating folders, moving files, and installing software. + +We recommend installing this software on a computer with decent resources (e.g. a laptop with 8 GB of RAM). + +## Data science experience prerequisites + +Prior experience of using R or Python (e.g. having used it for work, in previous degrees or having completed an online course) is essential. + +Students can demonstrate this by showing evidence that they have worked with R before, have completed an online course such as the first 4 sessions in the [RStudio Primers series](https://rstudio.cloud/learn/primers) or [DataCamp’s Free Introduction to R course](https://www.datacamp.com/courses/free-introduction-to-r). + +Evidence of substantial programming and data science experience in previous professional or academic work, in languages such as R or Python, also constitutes sufficient pre-requisite knowledge for the course. + +## Software + +Although you are free to use any software for the course, the emphasis on reproducibility means that popular popular and established data science languages R and Python are *highly* recommended. + +The teaching will be delivered primarily in R, with some Python code snippets and examples. +Unless you have a good reason to use Python, we recommend you use R for the course. + +### R software prerequisites +For this module you therefore need to have up-to-date versions of R and RStudio installed on a computer you have access to: + +- R from [cran.r-project.org](https://cran.r-project.org/) +- RStudio from [rstudio.com](https://rstudio.com/products/rstudio/download/#download) +- R packages, which can be installed by opening RStudio and typing `install.packages("stats19")` in the R console, for example. +- To install all the dependencies for the module, run the following command in the R console: + +```{r} +#| eval: false +if (!requireNamespace("remotes", quietly = TRUE)) { + install.packages("remotes") +} +remotes::install_github("itsleeds/tdstests") +``` + +See [Section 1.5 of the online guide Reproducible Road Safety Research with R](https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio) for instructions on how to install key packages we will use in the module.^[ +For further guidance on setting-up your computer to run R and RStudio for spatial data, see these links, we recommend +Chapter 2 of Geocomputation with R (the Prerequisites section contains links for installing spatial software on Mac, Linux and Windows): https://geocompr.robinlovelace.net/spatial-class.html and Chapter 2 of the online book *Efficient R Programming*, particularly sections 2.3 and 2.5, for details on R installation and [set-up](https://csgillespie.github.io/efficientR/set-up.html) and the +[project management section](https://csgillespie.github.io/efficientR/set-up.html#project-management). +] + +### Python software prerequisites + +We installing Python with a modern package manager such as `pixi`. diff --git a/marking-criteria.qmd b/marking-criteria.qmd new file mode 100644 index 0000000..b214b93 --- /dev/null +++ b/marking-criteria.qmd @@ -0,0 +1,73 @@ +--- +title: "Marking Criteria" +format: + html: + theme: cerulean +--- + +## Marks + +Marks are awarded in 4 categories, accounting for the following criteria: + +### Data processing: 20% + +1. The selection and effective use of input datasets that are large (e.g. covering multiple years), complex (e.g. containing multiple variables) and/or diverse (e.g. input datasets from multiple sources are used and where appropriate combined in the analysis) +2. Describe how the data was collected and implications for data quality, and outline how the input datasets were downloaded (with a reproducible example if possible), with a description that will allow others to understand the structure of the inputs and how to import them +3. Evidence of data cleaning techniques (e.g. by re-categorising variables) +4. Adding value to datasets with joins (key-based or spatial), creation of new variables (also known as feature engineering) and reshaping data (e.g. from wide to long format) + +**Distinction (70%+):** The report makes use of a complex (with many columns and rows) and/or multiple input datasets, efficiently importing them and adding value by creating new variables, recategorising, changing data formats/types, and/or reshaping the data. Selected datasets are very well suited to the research questions, clearly described, with links to the source and understanding of how the datasets were generated. + +**Merit (60-69%):** The report makes some use of complex or multiple input datasets. The selection, description of, cleaning or value-added to the input datasets show skill and care applied to the data processing stage but with some weaknesses. Selected datasets are appropriate for the research questions, with some description or links to the data source. + +**Pass (50-59%):** There is some evidence of care and attention put into the selection, description of or cleaning of the input datasets but little value has been added. The report makes little use of complex or multiple input datasets. The datasets are not appropriate for the research questions, the datasets are not clearly described, or there are no links to the source or understanding of how the datasets were generated, but the data processing aspect of the work acceptable. + +**Fail (0-49%):** The report does not make use of appropriate input datasets and contains very little or now evidence of data cleaning, adding value to the datasets or reshaping the data. While there may be some evidence of data processing, it is of poor quality and/or not appropriate for the research questions. + +### Visualization and report: 20% + +1. Creation of figures that are readable and well-described (e.g. with captions and description) +1. High quality, attractive or advanced techniques (e.g. multi-layered maps or graphs, facets or other advanced techniques) +1. Using visualisation techniques appropriate to the topic and data and interpreting the results correctly (e.g. mentioning potential confounding factors that could account for observed patterns) +1. The report is well-formatted, accessible (e.g. with legible text size and does not contain excessive code in the submitted report) and clearly communicates the data and analysis visually, with appropriate figure captions, cross-references and a consistent style + +**Distinction (70%+):** The report contains high quality, attractive, advanced and meaningful visualisations that are very well-described and interpreted, showing deep understanding of how visualisation can communicate meaning contained within datasets. The report is very well-formatted, accessible and clearly communicates the data and analysis visually. + +**Merit (60-69%):** The report contains good visualisations that correctly present the data and highlight key patterns. The report is has appropriate formatting. + +**Pass (50-59%):** The report contains basic visualisations or are not well-described or interpreted correctly or the report is poorly formatted, not accessible or does not clearly communicate the data and analysis visually. + +**Fail (0-49%):** The report is of unacceptable quality (would likely be rejected in a professional setting) and/or has poor quality and/or few visualisations, or the visualisations are inappropriate given the data and research questions. + +### Code quality, efficiency and reproducibility: 20% + +1. Code quality in the submitted source code, including using consistent style, appropriate packages, and clear comments +1. Efficiency, including pre-processing to reduce input datasets (avoiding having to share large datasets in the submission for example) and computationally efficient implementations +1. The report is fully reproducible, including generation of figures. There are links to online resources for others wanting to reproduce the analysis for another area, and links to the input data + +**Distinction (70%+):** The source code underlying the report contains high quality, efficient and reproducible code that is very well-written, using consistent syntax and good style, well-commented and uses appropriate packages. The report is fully reproducible, with links to online resources for others wanting to reproduce the analysis for another area, and links to the input data. + +**Merit (60-69%):** The code is readable and describes the outputs in the report but lacks quality, either in terms of comments, efficiency or reproducibility. + +**Pass (50-59%):** The source code underlying the report describes the outputs in the report but is not well-commented, not efficient or has very limited levels of reproduicibility, with few links to online resources for others wanting to reproduce the analysis for another area, and few links to the input data. + +**Fail (0-49%):** The report has little to no reproducible, readable or efficient code. A report that includes limited well-described code in the main text or in associated files would be considered at the borderline between a fail and a pass. A report that includes no code would be considered a low fail under this criterion. + +### Understanding the data science process, including choice of topic and impact: 40% + +1. Topic selection, including originality, availability of datasets related to the topic and relevance to solving transport planning problems +1. Clear research question +1. Appropriate reference to the academic, policy and/or technical literature and use of the literature to inform the research question and methods +1. Use of appropriate data science methods and techniques +1. Discussion of the strengths and weaknesses of the analysis and input datasets and/or how limitations could be addressed +1. Discuss further research and/or explain the potential impacts of the work +1. The conclusions are supported by the analysis and results +1. The contents of the report fit together logically and support the aims and/or research questions of the report + +**Distinction (70%+):** The report contains a clear research question, appropriate reference to the academic, policy and/or technical literature, use of appropriate data science methods and techniques, discussion of the strengths and weaknesses of the analysis and input datasets and/or how limitations could be addressed. The report discusses further research and/or explores of the potential impacts of the work. Conclusions are supported by the analysis and results, and the contents of the report fit together logically as a cohehisive whole that has a clear direction set-out by the aims and/or research questions. To get a Distinction there should also be evidence of considering the generalisability of the methods and reflections on how it could be built on by others in other areas. + +**Merit (60-69%):** There is a clear research question. There is some reference to the academic, policy and/or technical literature. The report has a good structure and the results are supported by the analysis. There is some discussion of the strengths and weaknesses of the analysis and input datasets and/or how limitations could be addressed. + +**Pass (50-59%):** The report contains a valid research question but only limited references to appropriate literature or justification. There is evidence of awareness of the limitations of the results and how they inform conclusions, but these are not fully supported by the analysis. The report has a reasonable structure but does not fit together well in a cohesive whole. + +**Fail (0-49%):** The report does not contain a valid research question, has no references to appropriate literature or justification, does not discuss the limitations of the results or how they inform conclusions, or the report does not have a reasonable structure. diff --git a/materials.qmd b/materials.qmd new file mode 100644 index 0000000..c2df7c2 --- /dev/null +++ b/materials.qmd @@ -0,0 +1,16 @@ +--- +title: "Course Materials" +--- + +- [Practical 1](p1) + - [Slides](p1/slides.html) +- [Practical 2](p2) +- [Practical 3](p3) +- [Practical 4](p4) +- [Practical 5](p5) +- [Practical 6](p6) + +- [Datasets](data/) +- [Slides](slides/) (those not included in the practicals) + + diff --git a/messages/2-pager.md b/messages/2-pager.md deleted file mode 100644 index 4e3bc55..0000000 --- a/messages/2-pager.md +++ /dev/null @@ -1,29 +0,0 @@ - - -Dear All, - -A couple of you asked about submitting the 2 page report before the -deadline of 22nd April and I wanted to provide a response to everyone. - -Good news: early submissions are very welcome. We can start providing -feedback as soon as reports are submitted, meaning you will get more -valuable time to act on the feedback if you submit before. The -[submission box is now open on -Minerva](https://minerva.leeds.ac.uk/ultra/courses/_551386_1/outline/assessment/test/_11896344_1?courseId=_551386_1&gradeitemView=details). - -For guidance on what you should submit, see: - -- [Guidance on what you should include: - https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#01-two-pager](https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#01-two-pager) - -- See - [here](https://github.com/ITSLeeds/TDS/releases/download/24/2-pager.zip) - an example of how to upload a .zip file with the .qmd document and the - pdf that it produces. Note: you can download and unzip this folder and - reproduce the .qmd document, see the pdf for examples of how to add - citations. - -- See the source code of the example - [here](https://github.com/ITSLeeds/TDS/blob/master/demos/2-pager/2-pager.qmd) - -Please let me know if you have any questions. diff --git a/messages/2-pager.qmd b/messages/2-pager.qmd deleted file mode 100644 index a25dc0e..0000000 --- a/messages/2-pager.qmd +++ /dev/null @@ -1,21 +0,0 @@ ---- -format: gfm ---- - -Dear All, - -A couple of you asked about submitting the 2 page report before the deadline of 22nd April and I wanted to provide a response to everyone. - -Good news: early submissions are very welcome. -We can start providing feedback as soon as reports are submitted, meaning you will get more valuable time to act on the feedback if you submit before. -The [submission box is now open on Minerva](https://minerva.leeds.ac.uk/ultra/courses/_551386_1/outline/assessment/test/_11896344_1?courseId=_551386_1&gradeitemView=details). - -For guidance on what you should submit, see: - -- [Guidance on what you should include: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#01-two-pager](https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#01-two-pager) - -- See [here](https://github.com/ITSLeeds/TDS/releases/download/24/2-pager.zip) an example of how to upload a .zip file with the .qmd document and the pdf that it produces. - Note: you can download and unzip this folder and reproduce the .qmd document, see the pdf for examples of how to add citations. -- See the source code of the example [here](https://github.com/ITSLeeds/TDS/blob/master/demos/2-pager/2-pager.qmd) - -Please let me know if you have any questions. \ No newline at end of file diff --git a/messages/2021/README.md b/messages/2021/README.md deleted file mode 100644 index b8337a6..0000000 --- a/messages/2021/README.md +++ /dev/null @@ -1 +0,0 @@ -Place to store messages on the course. \ No newline at end of file diff --git a/messages/2021/bristol-exercise.R b/messages/2021/bristol-exercise.R deleted file mode 100644 index 626be09..0000000 --- a/messages/2021/bristol-exercise.R +++ /dev/null @@ -1,110 +0,0 @@ -# Aim: read-in and analyse medium sized dataset - -remotes::install_github("nowosad/spDataLarge") -library(tidyverse) -library(sf) -library(tmap) - -od = spDataLarge::bristol_od -head(od) -View(od) -class(od) - -zones = spDataLarge::bristol_zones -names(zones) - -zones = zones %>% - mutate(local_authority = word(string = name, start = 1)) -zones %>% - slice(1:5) %>% - pull(name) - -plot(zones %>% select(local_authority), key.pos = 1) -zones %>% slice(1:3) - -tmap_mode(mode = "view") -tm_shape(zones) + - # tm_polygons(col = "name") # not working - tm_polygons(col = "local_authority") - -bristol_sf = tmaptools::geocode_OSM("bristol", as.sf = TRUE, return.first.only = T, geometry = "point") -mapview::mapview(bristol_sf) - -bristol_buffer = stplanr::geo_buffer(bristol_sf, dist = 10000) -mapview::mapview(bristol_buffer) -zones_central = zones[bristol_buffer, , op = sf::st_within] -mapview::mapview(zones_central) - -od_central = od %>% - filter(o %in% zones_central$geo_code) %>% - filter(d %in% zones_central$geo_code) -nrow(od_central) / nrow(od) - -library(stplanr) -desire_lines = od2line(od_central, zones_central) - -## Creating centroids representing desire line start and end points. - -desire_lines$distance_direct_m = as.numeric(st_length(desire_lines)) -desire_lines = desire_lines %>% - mutate(proportion_active = (bicycle + foot) / all) - -# visualise -ggplot(desire_lines) + - geom_point(aes(distance_direct_m, proportion_active)) -ggplot(desire_lines) + - geom_point(aes(distance_direct_m, proportion_active, size = all), alpha = 0.3) - -# model/visualise -m1 = lm(proportion_active ~ - distance_direct_m + I(distance_direct_m^2), - data = desire_lines) -desire_lines = desire_lines %>% - mutate( - new_active_travel = m1$fitted.values * car_driver, - new_total_active = new_active_travel + bicycle + foot, - new_proportion_active = new_total_active / all - ) %>% - arrange(proportion_active) -ggplot(desire_lines) + - geom_point(aes(distance_direct_m, proportion_active, size = all), alpha = 0.3) + - geom_point(aes(distance_direct_m, new_proportion_active, size = all), alpha = 0.3, colour = "blue") - - -l = desire_lines %>% filter(all > 500) %>% - filter(o != d) -r = stplanr::route(l = l, route_fun = route_osrm) -mapview::mapview(r) + - mapview::mapview(l) - - -# get data leeds ---------------------------------------------------------- - -u = "https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/west-yorkshire/z.geojson" -zones = sf::read_sf(u) -mapview::mapview(zones) - -u_zip = "https://www.nomisweb.co.uk/output/census/2011/wu02ew_msoa.zip" -u_zip = "https://www.nomisweb.co.uk/output/census/2011/wu01ew_msoa.zip" -f_zip = basename(u_zip) -f_zip -download.file(url = u_zip, destfile = f_zip) -od_uk = read_csv(f_zip) -nrow(od_uk) -summary(od_uk) -od_uk_100_plus = od_uk %>% - rename(all = `All categories: Sex`) %>% - filter(all > 100) - -summary(od_uk_100_plus$`Area of residence` %in% zones$geo_code) - -od_uk_100_plus_yorkshire = od_uk_100_plus %>% - filter(`Area of residence` %in% zones$geo_code) %>% - filter(`Area of workplace` %in% zones$geo_code) - -desire_lines = od2line(flow = od_uk_100_plus_yorkshire, zones) -mapview::mapview(desire_lines) - -od_female = od_uk_100_plus_yorkshire %>% - filter(Female < Male) - diff --git a/messages/2021/practical-routing-notes.R b/messages/2021/practical-routing-notes.R deleted file mode 100644 index 93dc2b4..0000000 --- a/messages/2021/practical-routing-notes.R +++ /dev/null @@ -1,100 +0,0 @@ -library(sf) -library(tidyverse) -library(stplanr) -library(dodgr) # Local routing and network analysis -library(opentripplanner) # Connect to and use OpenTripPlanner -library(tmap) # Make maps -library(osmextract) # Download and import OpenStreetMap data -tmap_mode("plot") - -ip = "otp.saferactive.org" # an actual server -otpcon = otp_connect(hostname = ip, - port = 80, - router = "west-yorkshire") - -u = "https://github.com/ITSLeeds/TDS/releases/download/0.1/desire_lines.geojson" -desire_lines = read_sf(u) -dim(desire_lines) - -desire_lines = desire_lines %>% - select(geo_code1, geo_code2, all, bicycle, foot, car_driver) -names(desire_lines) - -tmaptools::palette_explorer() - -tm_shape(desire_lines) + - tm_lines(lwd = "all", col = "car_driver", - palette = "-viridis") - - -desire = bind_cols(desire_lines, line2df(desire_lines)) -desire = st_drop_geometry(desire) - -desire_top = slice_max(desire, order_by = all, n = 3) - - -desire_top_origin = as.matrix(desire_top[,c("fx","fy")]) -desire_top_destination = as.matrix(desire_top[,c("tx","ty")]) -routes_top = otp_plan(otpcon, - fromPlace = desire_top_origin, - toPlace = desire_top_destination, - mode = "CAR") - -u = "https://github.com/ITSLeeds/TDS/releases/download/0.20.1/transit_routes.gpkg" -download.file(url = u, destfile = "transit_routes.gpkg", mode = "wb") -u = "https://github.com/ITSLeeds/TDS/releases/download/0.20.1/driving_routes.gpkg" -download.file(url = u, destfile = "driving_routes.gpkg", mode = "wb") - -routes_drive = read_sf("driving_routes.gpkg") -routes_transit = read_sf("transit_routes.gpkg") - -routes_drive = routes_drive %>% - select(fromPlace, toPlace, mode, route_option, distance) -routes_transit = routes_transit %>% - select(fromPlace, toPlace, mode, route_option, distance) - -desire_drive = left_join(desire, routes_drive, - by = c("geo_code1" = "fromPlace", - "geo_code2" = "toPlace")) - -desire_drive = st_as_sf(desire_drive) -plot(desire_drive[1, ]) - - -desire_transit = left_join(desire, routes_transit, - by = c("geo_code1" = "fromPlace", - "geo_code2" = "toPlace")) - -desire_transit = st_as_sf(desire_transit) - -desire_drive = desire_drive[!is.na(desire_drive$mode),] -desire_transit = desire_transit[!is.na(desire_transit$mode),] - - -rnet_drive <- overline(desire_drive, "car_driver") - - -roads = oe_get("Isle of Wight", extra_tags = c("maxspeed","oneway")) -roads = roads[!is.na(roads$highway),] -road_types = c("residential","secondary","tertiary", - "unclassified","primary","primary_link", - "secondary_link","tertiary_link") - - -roads = roads[roads$highway %in% road_types, ] -graph = weight_streetnet(roads) - -estimate_centrality_time(graph) -graph = dodgr_contract_graph(graph) -centrality = dodgr_centrality(graph) - -clear_dodgr_cache() -centrality_sf = dodgr_to_sf(centrality) - -tmap_mode("view") -tm_shape(centrality_sf) + - tm_lines("centrality", - lwd = 3, - n = 8, - style = "fisher", - palette = "-viridis") diff --git a/messages/2021/practical1-homework.Rmd b/messages/2021/practical1-homework.Rmd deleted file mode 100644 index b37ad47..0000000 --- a/messages/2021/practical1-homework.Rmd +++ /dev/null @@ -1,84 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - - -Hi all, - -Thank you for paying close attention and working hard during the practical session yesterday. - -It may feel like being [thrown in at the deep end](https://www.collinsdictionary.com/dictionary/english/throw-someone-in-at-the-deep-end) but luckily you have a life jacket: -many outstanding tutorials that will help your learning journey. -You cannot learn a new language in 1 week, let alone 1 practical session. -But you all can, and I believe will, gain proficiency with R and improve your practical data science skills over the next three months, that is a core aim of the module. - -You have homework before the next practical session to help your journey. -I estimate that these require a minimum of ~3hr but you may benefit more from the tasks if you spend more time on them. - -## Homework 1: contribute on GitHub (~30 minutes) - -Starting with the quickest piece of homework, please add the coordinates where you are currently, the n. cups of coffee you drink per week and favourite mode of transport for fun (you can invent the data, this is not a test!) to this file: https://github.com/ITSLeeds/TDS/blob/master/messages/locations.csv - -I have added unique initials for each student and extra 'n_coffee' and 'mode' columns. -You can add your location to 0 or 1 decimal places. -To edit the file you must log-in to GitHub and then click on the pencil shaped button in the web page above (see [here for details](https://docs.github.com/en/github/managing-files-in-a-repository/editing-files-in-your-repository)), which should take you to this link: - -https://github.com/ITSLeeds/TDS/edit/master/messages/locations.csv - -As an optional bonus, you can plot the data using your favourite mapping package (suggestions, `ggplot2`, `tmap`), starting with the following commands (try running each line interactively to help understand what the code does): - -```{r} -library(tidyverse) -library(tmap) -tmap_mode("view") -url_locations = "https://github.com/ITSLeeds/TDS/raw/master/messages/locations.csv" -locations = read_csv(url_locations) -locations_sf = locations %>% - filter(!is.na(lon) & !is.na(lat)) %>% - sf::st_as_sf(coords = c("lon", "lat")) -tm_shape(locations_sf) + - tm_dots("name") -``` - -Bonus 2: make different visualisations of the data - you will have an opportunity to share you map with the class next Thursday. - -## Work through practical and the example with Bristol (~60 minutes) - -Ensure that you can run and understand all of the code in the 'Bristol' section of the practical: https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md#processing-origin-destination-data-in-bristol - -You may want to go back to the previous section of the practical to ensure you understand the concepts. - -Type in the code in that section (I strong discourage copy-pasting code here because you learn 'muscle memory' for coding by typing the commands) with reference to Chapter 12 of Geocomputation with R, that explains the data in conceptual terms: https://geocompr.robinlovelace.net/transport.html - -Bonus: also work through the 'processing medium sized data' section of the practical and read Chapter 5 of the R for Data Science book [@grolemund_r_2016]: https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md#processing-medium-sized-data-and-basic-visualisation-30-minutes - -## Think about research questions and data for the coursework (~30 minutes) - -You will be assessed based on a 10 page pdf report. -You do not need to start writing the report. -But before the next lecture it will help if you have thought about transport datasets and research questions you would like to answer using data science methods. - -Bonus: create a sketch or schematic diagram illustrating the research question you are interested in. - -Creativity is an important part of the data science process so it may help to start with a 'blank slate'. -You can draw your ideas down on paper on using a computer application. -For inspiration, take a look at this sketch of a web application for visualising road safety data: - -https://excalidraw.com/#room=bf5460ef09ebc3cdadef,si88ngJ5WOtODVqqjqVu3Q - -## Watch and make notes on the next lecture (~60 minutes) - -I will send you the next lecture on Monday. -Be ready to watch it, make notes, and look-up concepts and references from it. - - -# References - diff --git a/messages/2021/practical1-homework.md b/messages/2021/practical1-homework.md deleted file mode 100644 index d76481d..0000000 --- a/messages/2021/practical1-homework.md +++ /dev/null @@ -1,114 +0,0 @@ - - - -Hi all, - -Thank you for paying close attention and working hard during the -practical session yesterday. - -It may feel like being [thrown in at the deep -end](https://www.collinsdictionary.com/dictionary/english/throw-someone-in-at-the-deep-end) -but luckily you have a life jacket: many outstanding tutorials that will -help your learning journey. You cannot learn a new language in 1 week, -let alone 1 practical session. But you all can, and I believe will, gain -proficiency with R and improve your practical data science skills over -the next three months, that is a core aim of the module. - -You have homework before the next practical session to help your -journey. I estimate that these require a minimum of \~3hr but you may -benefit more from the tasks if you spend more time on them. - -## Homework 1: contribute on GitHub (\~30 minutes) - -Starting with the quickest piece of homework, please add the coordinates -where you are currently, the n. cups of coffee you drink per week and -favourite mode of transport for fun (you can invent the data, this is -not a test!) to this file: - - -I have added unique initials for each student and extra ‘n\_coffee’ and -‘mode’ columns. You can add your location to 0 or 1 decimal places. To -edit the file you must log-in to GitHub and then click on the pencil -shaped button in the web page above (see [here for -details](https://docs.github.com/en/github/managing-files-in-a-repository/editing-files-in-your-repository)), -which should take you to this link: - - - -As an optional bonus, you can plot the data using your favourite mapping -package (suggestions, `ggplot2`, `tmap`), starting with the following -commands (try running each line interactively to help understand what -the code does): - -``` r -library(tidyverse) -library(tmap) -tmap_mode("view") -url_locations = "https://github.com/ITSLeeds/TDS/raw/master/messages/locations.csv" -locations = read_csv(url_locations) -locations_sf = locations %>% - filter(!is.na(lon) & !is.na(lat)) %>% - sf::st_as_sf(coords = c("lon", "lat")) -tm_shape(locations_sf) + - tm_dots("name") -``` - -![](practical1-homework_files/figure-gfm/unnamed-chunk-2-1.png) - -Bonus 2: make different visualisations of the data - you will have an -opportunity to share you map with the class next Thursday. - -## Work through practical and the example with Bristol (\~60 minutes) - -Ensure that you can run and understand all of the code in the ‘Bristol’ -section of the practical: - - -You may want to go back to the previous section of the practical to -ensure you understand the concepts. - -Type in the code in that section (I strong discourage copy-pasting code -here because you learn ‘muscle memory’ for coding by typing the -commands) with reference to Chapter 12 of Geocomputation with R, that -explains the data in conceptual terms: - - -Bonus: also work through the ‘processing medium sized data’ section of -the practical and read Chapter 5 of the R for Data Science book -(Grolemund and Wickham 2016): - - -## Think about research questions and data for the coursework (\~30 minutes) - -You will be assessed based on a 10 page pdf report. You do not need to -start writing the report. But before the next lecture it will help if -you have thought about transport datasets and research questions you -would like to answer using data science methods. - -Bonus: create a sketch or schematic diagram illustrating the research -question you are interested in. - -Creativity is an important part of the data science process so it may -help to start with a ‘blank slate.’ You can draw your ideas down on -paper on using a computer application. For inspiration, take a look at -this sketch of a web application for visualising road safety data: - - - -## Watch and make notes on the next lecture (\~60 minutes) - -I will send you the next lecture on Monday. Be ready to watch it, make -notes, and look-up concepts and references from it. - -# References - -
- -
- -Grolemund, Garrett, and Hadley Wickham. 2016. *R for Data Science*. -O’Reilly Media. - -
- -
diff --git a/messages/2021/practical1-notes.R b/messages/2021/practical1-notes.R deleted file mode 100644 index 44d7067..0000000 --- a/messages/2021/practical1-notes.R +++ /dev/null @@ -1,57 +0,0 @@ -x = 1:9 -x = c(x, 1.1) -y = sqrt(x = x) - -class(x) -class(y) - -z = paste0(1:10, "z") -class(z) -d = data.frame(x = x, y = y, z) -View(d) -class(d) - -d[3, c(2, 3)] - -library(tidyverse) -d %>% - filter(x == 3) -# identical to: -filter(d, x == 3) -?filter - -plot(d) -plot(d$x, d$y) - -od_data = stplanr::od_data_sample - -od_data_walk = od_data %>% - rename(walk = foot) %>% - filter(walk > 0) %>% - select(geo_code1, geo_code2, walk, car_driver, all) %>% - mutate( - proportion_walk = walk / all, - proportion_drive = car_driver / all - ) - -plot(od_data_walk$car_driver, od_data_walk$walk) - -m1 = lm(proportion_walk ~ proportion_drive, data = od_data_walk) -m1 -summary(m1) -plot(od_data_walk$proportion_drive, od_data_walk$proportion_walk) -points(od_data_walk$proportion_drive, m1$fitted.values, col = "red") - -od_data_walk$walk_predicted = m1$fitted.values -ggplot(od_data_walk) + - geom_point(aes(proportion_drive, proportion_walk, size = all)) + - geom_line(aes(proportion_drive, walk_predicted)) - -zones = stplanr::zones_sf -zones$geometry - -desire_lines = stplanr::od2line(od_data_walk, zones = zones) -mapview::mapview(desire_lines) - -ggplot(data = desire_lines) + - geom_sf(aes(colour = walk_predicted)) diff --git a/messages/2021/practical2-homework.Rmd b/messages/2021/practical2-homework.Rmd deleted file mode 100644 index c56f81b..0000000 --- a/messages/2021/practical2-homework.Rmd +++ /dev/null @@ -1,166 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - - -Hi all, - -Following the practical session yesterday I have some homework and links and for you. - -## 1) Think of topics/datasets that you would like to work on/with for the coursework report that you will submit in May. -Below are some options: - -### Topics - -- Data collection and analysis - - Analysis of a large transport dataset, e.g. https://www.nature.com/articles/sdata201889 - -- Infrastructure and travel behaviour - - What are the relationships between specific types of infrastructure and travel, e.g. between fast roads and walking? - - How do official sources of infrastructure data (e.g. the [CID](https://github.com/PublicHealthDataGeek/CycleInfraLnd/)) compare with crowd-sourced datasets such as OpenStreetMap (which can be accessed with the new [`osmextract` R package](https://github.com/ropensci/osmextract)) - - Machine learning and image recognition to understand transport infrastructure - see https://telraam.net/ for example - -- Changing transport systems - - Modelling change in transport systems, e.g. by comparing before/after data for different countries/cities, which countries had the hardest lockdowns and where have changes been longer term? - see here for open data: https://github.com/ActiveConclusion/COVID19_mobility - - How have movement patterns changed during the Coronavirus pandemic and what impact is that likely to have long term (see [here](https://saferactive.github.io/trafficalmr/articles/report3.html) for some graphics on this) - -- Software development - - Creating a package to make a particular data source more accessible, see https://github.com/ropensci/stats19 for an example - - Integration between R and A/B Street - see https://github.com/a-b-street/abstr - -- Road safety - how can we makes roads and transport systems in general safer? - -- Other - - Other topics are welcome - -### Datasets - -These do not need to be used in isolation but choosing a main dataset should help - -- STATS19 road crash data (other countries have other datasets) -- 'PCT' data from UK travel behaviour -- OpenStreetMap data (global, you will need to think of a subset by area/type) -- Open data from a single city, e.g. Seattle: https://data-seattlecitygis.opendata.arcgis.com/ -- See here: https://github.com/awesomedata/awesome-public-datasets#transportation -- And here: https://github.com/CUTR-at-USF/awesome-transit - -### Start work on 1 page document outlining your ideas - -I have set a deadline of Friday 26th February for you to submit a 1-2 page pdf document and .Rmd file. -This does not need to be the final dissertation topic but will give you a chance to get feedback. -The document should contain: - -- Topics considered -- Datasets available -- Missing elements/skills/risks -- Questions that will help decide the direction of the final coursework - - E.g. do you know of a package that can help with this? - - Is using this dataset a good idea? - - - -## 2) Working with RMarkdown - -Take a look at the info + video on RMarkdown here (they describe the benefits of the format better than me!): https://rmarkdown.rstudio.com/lesson-1.html - -Try to reproduce the .Rmd file I have provided - download this and try to knit it and reproduce the example data analysis: https://github.com/ITSLeeds/TDS/raw/master/coursework-template.Rmd - -## 3) Reading-up on R + Git - -If you would like to learn more about geographic data in R, I suggest taking a look at the first 5 chapters of Geocomputation with R: https://geocompr.robinlovelace.net/ - -If you feel you need practice on R basics, revisit this workbook and start at the beginning - it should answer many questions: https://itsleeds.github.io/rrsrr/ - -If you like learning from Videos, I recommend checking out the links + video by Tom Mock here: https://education.rstudio.com/learn/beginner/ - -If you're interested in what's coming next week, check this video: https://github.com/a-b-street/abstreet#ab-street - -And for everyone wanting more info on using Git/GitHub I highly recommend watching this video: https://rstudio.com/resources/rstudioconf-2017/happy-git-and-gihub-for-the-user-tutorial/ - -See here for a good introduction: https://happygitwithr.com/ - -## 4) Install key packages, download data and watch the 'Routing' video by Malcolm - -Following feedback from students it seems it would be useful to get a list of all packages used in the TDS module. -I have separated these between 'core' and 'extra' packages - you will need the core packages for sure and should already have them installed. - -### Core packages - -```{r, echo=FALSE, eval=FALSE} -# find deps in project -deps = renv::dependencies() -names(deps) -unique(deps$Package) -``` - - -Run the following commands to check you have them: - -```{r, eval=FALSE} -install.packages("remotes") -``` - -```{r, eval=FALSE} -install.packages("remotes") -pkgs = c( - "pct", # package for getting travel data in the UK - "sf", # spatial data package - "stats19", # downloads and formats open stats19 crash data - "stplanr", # for working with origin-destination and route data - "tidyverse", # a package for user friendly data science - "tmap", # for making maps - "opentripplanner", # routing - "mapview", # mapping - "osmextract", # get osm data - "dodgr", - "nycflights13" -) -remotes::install_cran(pkgs) -``` - -Don't forget, every time you open a new R session you need to load the packages you are going to use, for example: - -``` r -library(sf) -library(tidyverse) -library(pct) -``` - -### Extra packages - -These extra packages may be useful. These are 'development versions' of packages. This means the packages are in the process of being development (for example, a new version of an existing package). These development versions are hosted on github rather than CRAN. CRAN stands for 'The Comprehensive R Archive Network'. It is an online repository for packages where they go through regular testing https://cran.r-project.org/ . - -``` r -# CRAN versions -install.packages("spData") - -# development versions -remotes::install_github("ITSLeeds/pct") -remotes::install_github("ITSLeeds/od") -remotes::install_github("a-b-street/abstr") -``` - -### Automated script - -You can check your set-up with this script: - -```{r, eval=FALSE} -source("https://git.io/JvGjF") -``` - - -### Download data - -Follow instructions here to download the A/B Street data for the seminar: - - https://a-b-street.github.io/docs/howto/index.html - - diff --git a/messages/2021/practical2-homework.md b/messages/2021/practical2-homework.md deleted file mode 100644 index 8c1cd8c..0000000 --- a/messages/2021/practical2-homework.md +++ /dev/null @@ -1,198 +0,0 @@ - - - -Hi all, - -Following the practical session yesterday I have some homework and links -and for you. - -## 1) Think of topics/datasets that you would like to work on/with for the coursework report that you will submit in May. - -Below are some options: - -### Topics - -- Data collection and analysis - - - Analysis of a large transport dataset, - e.g.  - -- Infrastructure and travel behaviour - - - What are the relationships between specific types of - infrastructure and travel, e.g. between fast roads and walking? - - How do official sources of infrastructure data (e.g. the - [CID](https://github.com/PublicHealthDataGeek/CycleInfraLnd/)) - compare with crowd-sourced datasets such as OpenStreetMap (which - can be accessed with the new [`osmextract` R - package](https://github.com/ropensci/osmextract)) - - Machine learning and image recognition to understand transport - infrastructure - see for example - -- Changing transport systems - - - Modelling change in transport systems, e.g. by comparing - before/after data for different countries/cities, which - countries had the hardest lockdowns and where have changes been - longer term? - see here for open data: - - - How have movement patterns changed during the Coronavirus - pandemic and what impact is that likely to have long term (see - [here](https://saferactive.github.io/trafficalmr/articles/report3.html) - for some graphics on this) - -- Software development - - - Creating a package to make a particular data source more - accessible, see for an - example - - Integration between R and A/B Street - see - - -- Road safety - how can we makes roads and transport systems in - general safer? - -- Other - - - Other topics are welcome - -### Datasets - -These do not need to be used in isolation but choosing a main dataset -should help - -- STATS19 road crash data (other countries have other datasets) -- ‘PCT’ data from UK travel behaviour -- OpenStreetMap data (global, you will need to think of a subset by - area/type) -- Open data from a single city, e.g. Seattle: - -- See here: - -- And here: - -### Start work on 1 page document outlining your ideas - -I have set a deadline of Friday 26th February for you to submit a 1-2 -page pdf document and .Rmd file. This does not need to be the final -dissertation topic but will give you a chance to get feedback. The -document should contain: - -- Topics considered -- Datasets available -- Missing elements/skills/risks -- Questions that will help decide the direction of the final - coursework - - E.g. do you know of a package that can help with this? - - Is using this dataset a good idea? - - - -## 2) Working with RMarkdown - -Take a look at the info + video on RMarkdown here (they describe the -benefits of the format better than me!): - - -Try to reproduce the .Rmd file I have provided - download this and try -to knit it and reproduce the example data analysis: - - -## 3) Reading-up on R + Git - -If you would like to learn more about geographic data in R, I suggest -taking a look at the first 5 chapters of Geocomputation with R: - - -If you feel you need practice on R basics, revisit this workbook and -start at the beginning - it should answer many questions: - - -If you like learning from Videos, I recommend checking out the links + -video by Tom Mock here: - -If you’re interested in what’s coming next week, check this video: - - -And for everyone wanting more info on using Git/GitHub I highly -recommend watching this video: - - -See here for a good introduction: - -## 4) Install key packages, download data and watch the ‘Routing’ video by Malcolm - -Following feedback from students it seems it would be useful to get a -list of all packages used in the TDS module. I have separated these -between ‘core’ and ‘extra’ packages - you will need the core packages -for sure and should already have them installed. - -### Core packages - -Run the following commands to check you have them: - -``` r -install.packages("remotes") -``` - -``` r -install.packages("remotes") -pkgs = c( - "pct", # package for getting travel data in the UK - "sf", # spatial data package - "stats19", # downloads and formats open stats19 crash data - "stplanr", # for working with origin-destination and route data - "tidyverse", # a package for user friendly data science - "tmap", # for making maps - "opentripplanner", # routing - "mapview", # mapping - "osmextract", # get osm data - "dodgr", - "nycflights13" -) -remotes::install_cran(pkgs) -``` - -Don’t forget, every time you open a new R session you need to load the -packages you are going to use, for example: - -``` r -library(sf) -library(tidyverse) -library(pct) -``` - -### Extra packages - -These extra packages may be useful. These are ‘development versions’ of -packages. This means the packages are in the process of being -development (for example, a new version of an existing package). These -development versions are hosted on github rather than CRAN. CRAN stands -for ‘The Comprehensive R Archive Network.’ It is an online repository -for packages where they go through regular testing - . - -``` r -# CRAN versions -install.packages("spData") - -# development versions -remotes::install_github("ITSLeeds/pct") -remotes::install_github("ITSLeeds/od") -remotes::install_github("a-b-street/abstr") -``` - -### Automated script - -You can check your set-up with this script: - -``` r -source("https://git.io/JvGjF") -``` - -### Download data - -Follow instructions here to download the A/B Street data for the -seminar: - - diff --git a/messages/2021/practical4-demo.R b/messages/2021/practical4-demo.R deleted file mode 100644 index 2b9ae8e..0000000 --- a/messages/2021/practical4-demo.R +++ /dev/null @@ -1,103 +0,0 @@ -# Visualisation: basic code from practical 4 - -library(tidyverse) - -crashes_gb = stats19::get_stats19(year = 2019) - -names(crashes_gb) -ggplot(crashes_gb) + - geom_bar(aes(accident_severity)) - -class(crashes_gb$speed_limit) -crashes_gb$speed_limit = as.character(crashes_gb$speed_limit) -class(crashes_gb$speed_limit) -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill") - -# example of global settings -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill", alpha = 0.3) - -# manual colour palette -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill") + - scale_fill_manual(values = c("red", "yellow", "blue")) - -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill") + - scale_fill_brewer(palette = "Reds") + - facet_grid(~ speed_limit) - - -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill") - - -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill") + - scale_y_continuous(labels = scales::percent, name = "Percent") - - -# show layers -crashes_gb_fatal = crashes_gb %>% - filter(accident_severity == "Fatal") -nrow(crashes_gb_fatal) / nrow(crashes_gb) - -ggplot(crashes_gb) + - geom_point(aes(x = date, y = time), alpha = 0.1) + - geom_point(aes(x = date, y = time), - alpha = 0.1, data = crashes_gb_fatal, colour = "red") - -b = c("07:00", "09:00", "12:00", "17:00", "19:00") -ggplot(crashes_gb) + - geom_point(aes(datetime, time), alpha = 0.01) + - geom_point(aes(datetime, time), alpha = 0.1, data = crashes_gb_fatal, colour = "red") + - scale_y_discrete(breaks = b) - -# Demonstrate geographic data visualisation techniques - -# get some data -# searching internet... -u = "https://npttile.vs.mythic-beasts.com/npct-data/pct-outputs-regional-notR/commute/msoa/isle-of-wight/z.geojson" -zones = sf::read_sf(u) - -# basic visualisation -plot(zones) - -library(tmap) -tm_shape(zones) + - tm_polygons() - -# driving -names(zones)[1:15] -tm_shape(zones) + - tm_polygons(col = "car_driver") - -View(zones) -summary(zones$all) - -# Saving a plot -map1 = tm_shape(zones) + - tm_polygons(col = "car_driver") -tmap_save(tm = map1, filename = "isle-of-wight-drive.png") - -# boundaries -bounding_box = sf::st_bbox(zones) -bounding_box - -# multiple layers -pct::pct_regions$region_name -routes = pct::get_pct_routes_fast(region = "isle-of-wight") - -tm_shape(zones) + - tm_polygons(col = "car_driver") + - tm_shape(routes) + - tm_lines(lwd = "all") - -tmap_mode("view") - -tm_shape(zones) + - tm_polygons(col = "car_driver") + - tm_shape(routes) + - tm_lines(lwd = "all") - diff --git a/messages/2021/practical4.Rmd b/messages/2021/practical4.Rmd deleted file mode 100644 index 5f42356..0000000 --- a/messages/2021/practical4.Rmd +++ /dev/null @@ -1,42 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - -# Session plan for Practical 4 - -This is will be the 4th and penultimate practical session in the TDS module. -The final practical session on coursework will be **next Friday 26th**. - -The session will be structured as follows: - -- Demonstration of visualisation techniques and modelling: 30 minutes (30 min) - -- Working through the questions in the visualisation practical at https://github.com/ITSLeeds/TDS/blob/master/practicals/7-viz.md (25 min) - -- 5 minute break - -- Breakout rooms on specific topics, 30 minutes each in any 2 of the following, in ascending order of difficulty: - - - Basic R questions and skills - - Do you have general R questions like "Why can't I read-in this dataset"? How to do I clean this dataset? This is the room for asking general R questions. - - Using RMarkdown for your coursework - - In this room, which I recommend everyone visits unless you are confident with your topic and RMarkdown, you will see how to make RMarkdown documents and get a chance to present your own RMarkdown document, a vital step towards submitting the coursework - - Visualisation - - Further support on data visualisation - - Modelling - - Techniques for modelling transport data - -# Homework - -Before the session on Thursday you must create a small RMarkdown document that you can convert to PDF, either by printing to PDF in a web browser such as Chrome or by using output: pdf_document as outlined here: https://bookdown.org/yihui/rmarkdown/pdf-document.html - -Bonus: try to reproduce the coursework template by copying this text into a file called test.Rmd and knitting it: https://raw.githubusercontent.com/ITSLeeds/TDS/master/coursework-template.Rmd - diff --git a/messages/2021/practical4.md b/messages/2021/practical4.md deleted file mode 100644 index 3fcc2f3..0000000 --- a/messages/2021/practical4.md +++ /dev/null @@ -1,48 +0,0 @@ - - - -# Session plan for Practical 4 - -This is will be the 4th and penultimate practical session in the TDS -module. The final practical session on coursework will be **next Friday -26th**. - -The session will be structured as follows: - -- Demonstration of visualisation techniques and modelling: 30 minutes - (30 min) - -- Working through the questions in the visualisation practical at - - (25 min) - -- 5 minute break - -- Breakout rooms on specific topics, 30 minutes each in any 2 of the - following, in ascending order of difficulty: - - - Basic R questions and skills - - Do you have general R questions like “Why can’t I read-in - this dataset?” How to do I clean this dataset? This is the - room for asking general R questions. - - Using RMarkdown for your coursework - - In this room, which I recommend everyone visits unless you - are confident with your topic and RMarkdown, you will see - how to make RMarkdown documents and get a chance to present - your own RMarkdown document, a vital step towards submitting - the coursework - - Visualisation - - Further support on data visualisation - - Modelling - - Techniques for modelling transport data - -# Homework - -Before the session on Thursday you must create a small RMarkdown -document that you can convert to PDF, either by printing to PDF in a web -browser such as Chrome or by using output: pdf\_document as outlined -here: - -Bonus: try to reproduce the coursework template by copying this text -into a file called test.Rmd and knitting it: - diff --git a/messages/2021/practical5.Rmd b/messages/2021/practical5.Rmd deleted file mode 100644 index be97244..0000000 --- a/messages/2021/practical5.Rmd +++ /dev/null @@ -1,54 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - -# Session plan for Practical 5 - -The main purpose of this session is to support you on the project submission on May 14th. -See here for a reminder of the mark scheme: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#marks - -See here for an example of good practice submission that we will talk about: https://leeds365.sharepoint.com/sites/msteams_cbf52a/Shared%20Documents/General/stats19-example.zip - - -This is will be the 5th and final practical session. It will be structured as follows: - -- Demonstration of tips and tricks when using RMarkdown, questions, and working on the '1 pager' to be submitted on Monday (1 hour) - - screen share and questions will be welcome - - based on this example of a good submission https://leeds365.sharepoint.com/sites/msteams_cbf52a/Shared%20Documents/General/stats19-example.zip - - working through reproduce document - - **plagiarism not tolerated** - - quality over quantity - -Examples of RMarkdown documents, see: https://bookdown.org/yihui/rmarkdown/ - -Adding figures, see: https://bookdown.org/yihui/rmarkdown/r-code.html#figures - -Try opening up Joey's example RMarkdown file: https://github.com/ITSLeeds/TDS/blob/master/practicals/rmarkdown-demo.Rmd - -We will work on this together. - -```{r, echo=FALSE, eval=FALSE} -file.edit("practicals/rmarkdown-demo.Rmd") -``` - -Open the example of a good submission from the .zip file. - - -- Breakout rooms in which everyone will have a chance to talk about their dissertation ideas (1 hour) - - 1 minute each outlining the topic and follow-up questions (10 minutes) - - What are the strongest aspects of your coursework idea so far? What are the weakest? (20 minutes discussions and questions) - - Working through each of these questions and sketching the answers (10 minutes working alone, 20 minutes discussions and question): - - What are the priorities over the next three weeks (break it down into 3 parts) - - What do you need to find more literature on? - - What do you need more data on (you should have all the data already)? - - What additional skills do you need (now is a good time to ask)? - -- Discussion of options as a group, working on the '1 pager' and sharing and links to further resources (30 minutes) diff --git a/messages/2021/practical5.md b/messages/2021/practical5.md deleted file mode 100644 index 05f94a1..0000000 --- a/messages/2021/practical5.md +++ /dev/null @@ -1,56 +0,0 @@ - - - -# Session plan for Practical 5 - -The main purpose of this session is to support you on the project -submission on May 14th. See here for a reminder of the mark scheme: - - -See here for an example of good practice submission that we will talk -about: - - -This is will be the 5th and final practical session. It will be -structured as follows: - -- Demonstration of tips and tricks when using RMarkdown, questions, - and working on the ‘1 pager’ to be submitted on Monday (1 hour) - - screen share and questions will be welcome - - based on this example of a good submission - - - working through reproduce document - - **plagiarism not tolerated** - - quality over quantity - -Examples of RMarkdown documents, see: - - -Adding figures, see: - - -Try opening up Joey’s example RMarkdown file: - - -We will work on this together. - -Open the example of a good submission from the .zip file. - -- Breakout rooms in which everyone will have a chance to talk about - their dissertation ideas (1 hour) - - 1 minute each outlining the topic and follow-up questions (10 - minutes) - - What are the strongest aspects of your coursework idea so far? - What are the weakest? (20 minutes discussions and questions) - - Working through each of these questions and sketching the - answers (10 minutes working alone, 20 minutes discussions and - question): - - What are the priorities over the next three weeks (break it - down into 3 parts) - - What do you need to find more literature on? - - What do you need more data on (you should have all the data - already)? - - What additional skills do you need (now is a good time to - ask)? -- Discussion of options as a group, working on the ‘1 pager’ and - sharing and links to further resources (30 minutes) diff --git a/messages/2021/practical6.Rmd b/messages/2021/practical6.Rmd deleted file mode 100644 index e24b5b7..0000000 --- a/messages/2021/practical6.Rmd +++ /dev/null @@ -1,27 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - -# Session plan for Practical 6 - -Like the previous practical session, the aim of this informal session is to support you on the project submission on May 14th. - -You will have the opportunity to ask specific questions about the coursework in breakout rooms. - -During the first part of the project you will be asked to provide a 2 minute update on your coursework, data you are using and any challenges/questions. - -The rooms will be: - -- Me in the main room -- Caroline in Room 1 -- Martyna in Room 2 - -I recommend preparing questions before the practical session. \ No newline at end of file diff --git a/messages/2021/practical6.md b/messages/2021/practical6.md deleted file mode 100644 index f3de49f..0000000 --- a/messages/2021/practical6.md +++ /dev/null @@ -1,22 +0,0 @@ - - - -# Session plan for Practical 6 - -Like the previous practical session, the aim of this informal session is -to support you on the project submission on May 14th. - -You will have the opportunity to ask specific questions about the -coursework in breakout rooms. - -During the first part of the project you will be asked to provide a 2 -minute update on your coursework, data you are using and any -challenges/questions. - -The rooms will be: - -- Me in the main room -- Caroline in Room 1 -- Martyna in Room 2 - -I recommend preparing questions before the practical session. diff --git a/messages/2021/r-experience-form.R b/messages/2021/r-experience-form.R deleted file mode 100644 index e69de29..0000000 diff --git a/messages/2021/seminar1-notes.R b/messages/2021/seminar1-notes.R deleted file mode 100644 index d0432e3..0000000 --- a/messages/2021/seminar1-notes.R +++ /dev/null @@ -1,193 +0,0 @@ - -remotes::install_github("a-b-street/abstr") - - - -library(abstr) -library(tidyverse) -library(tmap) -tmap_mode("view") - -leeds_desire_lines -class(leeds_desire_lines) - -tm_shape(leeds_desire_lines) + - tm_lines() + - qtm(leeds_houses) + - qtm(leeds_buildings) + - qtm(leeds_zones) - -ablines_dutch = ab_scenario( - houses = leeds_houses, - buildings = leeds_buildings, - desire_lines = leeds_desire_lines, - zones = leeds_zones, - output_format = "sf" -) - -qtm(ablines_dutch) + - qtm(leeds_buildings) -ablines_dutch_drive = ablines_dutch %>% - filter(mode_base == "Drive") -tm_shape(leeds_buildings, bbox = sf::st_bbox(ablines_dutch_drive)) + tm_polygons() + - tm_shape(leeds_houses) + tm_polygons(col = "blue") + - tm_shape(ablines_dutch_drive) + tm_lines(col = "red") - - -ablines_dutch_drive_routes = stplanr::route( - l = ablines_dutch_drive, - route_fun = stplanr::route_osrm, - osrm.profile = "car" -) -ablines_dutch_drive_routes$n = 1 -ablines_rnet = stplanr::overline(ablines_dutch_drive_routes, "n") -tm_shape(ablines_rnet) + tm_lines(lwd = "n", scale = 9) - -mean(ablines_dutch_drive_routes$distance) -sd(ablines_dutch_drive_routes$distance) - - -# Bonus (warning: very difficult): Generate an A/B Street scenario of your choice for the local authority of Hereford (or any other local authority in England and Wales) starting with this code. Note: this requires fast internet connection and decent computer: - -local_authority_name = "Leeds" -pct_region_name = pct::pct_regions_lookup %>% - filter(lad16nm == local_authority_name) %>% - pull(region_name) -zones_region = pct::get_pct_zones(region = pct_region_name, geography = "msoa") -table(zones_region$lad_name) -nrow(zones_region) -zones = zones_region %>% - filter(str_detect(string = lad_name, pattern = local_authority_name)) -nrow(zones) -plot(zones %>% select(bicycle:car_passenger)) -od_national = pct::get_od() -desire_lines = od::od_to_sf(od_national, z = zones) -qtm(zones, alpha = 0.3) + - tm_shape(desire_lines) + - tm_lines(lwd = "all", scale = 5) - -times = list(commute = list(hr = 8.5, sd = 0.3), town = list(hr = 11, sd = 2)) -central_hereford = tmaptools::geocode_OSM("tupsley, hereford", as.sf = TRUE) -central_hereford = tmaptools::geocode_OSM("leeds centre", as.sf = TRUE) -site_area = stplanr::geo_buffer(central_hereford, dist = 500) -study_area = sf::st_union(zones) -# buildings = osmextract::oe_get(study_area, layer = "multipolygons") -osm_polygons = osmextract::oe_get(sf::st_centroid(study_area), layer = "multipolygons") - -# # # sanity check scenario data -# class(desire_lines) -# sum(desire_lines$trimode_base) -# sum(desire_lines$walk_base, desire_lines$cycle_base, desire_lines$drive_base) -# sum(desire_lines$walk_godutch, desire_lines$cycle_godutch, desire_lines$drive_godutch) - -building_types = c( - "office", - "industrial", - "commercial", - "retail", - "warehouse", - "civic", - "public" -) -osm_buildings = osm_polygons -# osm_buildings = osm_polygons %>% -# filter(building %in% building_types) -# pct_zone = pct::pct_regions[site_area %>% sf::st_centroid(), ] -# zones = pct::get_pct_zones(pct_zone$region_name, geography = "msoa") -# zones_of_interest = zones[zones$geo_code %in% c(desire_lines$geo_code1, desire_lines$geo_code2), ] -zones_of_interest = zones - -buildings_in_zones = osm_buildings[zones_of_interest, , op = sf::st_within] - -# mapview::mapview(zones_of_interest) + -# mapview::mapview(buildings_in_zones) -buildings_in_zones = buildings_in_zones %>% - select(osm_way_id, building) - -n_buildings_per_zone = aggregate(buildings_in_zones, zones_of_interest, FUN = "length") -summary(n_buildings_per_zone$osm_way_id) -mbz = 10 -zones_lacking_buildings = n_buildings_per_zone$osm_way_id < mbz -zones_lacking_buildings[is.na(zones_lacking_buildings)] = TRUE -# if(any(zones_lacking_buildings)) { -# sz = rep(5, length(zones_lacking_buildings) ) # n buildings per zone - arbitrary -# new_buildings = sf::st_sample(zones_of_interest[zones_lacking_buildings, ], size = sz) -# new_buildings = sf::st_sf( -# data.frame(osm_way_id = rep(NA, length(new_buildings)), building = NA), -# geometry = stplanr::geo_buffer(new_buildings, dist = 20, nQuadSegs = 1) -# ) -# buildings_in_zones = rbind(buildings_in_zones, new_buildings) -# } - -zones_of_interest = zones %>% - filter(!zones_lacking_buildings) - -osm_polygons_cents = osm_polygons %>% sf::st_centroid() -osm_polygons_cents_in_site = osm_polygons_cents[site_area, , op = sf::st_within] -osm_polygons_in_site = osm_polygons %>% - filter(osm_id %in% osm_polygons_cents_in_site$osm_way_id) -houses = osm_polygons_in_site %>% - filter(building == "residential") %>% # todo: all non-destination buildings? - select(osm_way_id, building) -n_houses = nrow(houses) - -names(desire_lines) - -names(desire_lines) -# names(desire_lines)[3:13] -# names(desire_lines)[3:13] = paste0("base_", names(desire_lines)[3:13]) -desire_lines_scenario = desire_lines %>% - transmute(geo_code1, geo_code2, walk_base = foot, cycle_base = bicycle, drive_base = car_driver) %>% - mutate(all_base = walk_base + cycle_base + drive_base) - -# visualise inputs -mapview::mapview(houses) + - mapview::mapview(zones_of_interest) + - mapview::mapview(buildings_in_zones) + - mapview::mapview(desire_lines_scenario) - - -abc = abstr::ab_scenario( - houses, - buildings = buildings_in_zones, - desire_lines = desire_lines_scenario %>% sample_n(5), - zones = zones_of_interest, - scenario = "base", - output_format = "sf" -) - -abc$departure = abstr::ab_time_normal(hr = times$commute$hr, sd = times$commute$sd, n = nrow(abc)) -abt = abstr::ab_scenario( - houses, - buildings = buildings_in_zones, - desire_lines = desire_lines %>% filter(purpose == "town"), - zones = zones_of_interest, - scenario = "base", - output_format = "sf" -) -abt$departure = abstr::ab_time_normal(hr = times$town$hr, sd = times$town$sd, n = nrow(abt)) -abb = rbind(abc, abt) -abbl = abstr::ab_sf_to_json(abb) - -abcd = abstr::ab_scenario( - houses, - buildings = buildings_in_zones, - desire_lines = desire_lines %>% filter(purpose == "commute"), - zones = zones_of_interest, - scenario = "dutch", - output_format = "sf" -) -abcd$departure = abstr::ab_time_normal(hr = times$commute$hr, sd = times$commute$sd, n = nrow(abc)) -abtd = abstr::ab_scenario( - houses, - buildings = buildings_in_zones, - desire_lines = desire_lines %>% filter(purpose == "town"), - zones = zones_of_interest, - scenario = "dutch", - output_format = "sf" -) -abtd$departure = abstr::ab_time_normal(hr = times$town$hr, sd = times$town$sd, n = nrow(abtd)) -abbd = rbind(abcd, abtd) -hist(abbd$departure, breaks = seq(0, 60*60*24, 60 * 15)) -abbld = abstr::ab_sf_to_json(abbd, mode_column = "mode_dutch") - diff --git a/messages/2021/seminar1-workshop.Rmd b/messages/2021/seminar1-workshop.Rmd deleted file mode 100644 index 7106631..0000000 --- a/messages/2021/seminar1-workshop.Rmd +++ /dev/null @@ -1,164 +0,0 @@ ---- -output: github_document ---- - -# Seminar 1: Data driven and gamified transport planning with A/B Street - -14:00-14:10: Introduction to the session (Robin) -14:10-14:40: A/B Street Lecture (Dustin - video) -14:40-14:55: Q&A - -14:55-15:00 Break - -15:00-16:00 Exercises -16:00-16:30 Discussion of coursework ideas in breakout rooms - - -Exercises, in ascending order of difficulty: - -## Getting started (everyone) - 15 min - - - - -- Download A/B Street and get it running: https://a-b-street.github.io/docs/howto/index.html -- Open sandbox mode, then change the map at the top -- Choose to download more cities, and pick **gb/leeds** -- Open up North Leeds -- Find the University of Leeds ITS building (using your own knowledge, the search tool to the bottom right, or by following the GIF below) - -![Imgur](https://i.imgur.com/5I7NeGm.gif) - -- Change the scenario at the top from "none" to "trips between home and work" -- Run the simulation until about 6:30 (using the speed controls at the bottom) - -Note: If you can't install A/B Street on your computer, ask for help. As a last resort, you can run it in your browser, but this will be slower: abstreet.s3-website.us-east-2.amazonaws.com/dev/game/?--dev&gb/leeds/maps/north.bin - -**Next you will choose one from the following three options.** - -## 1 In Game exercises - demo 5 min on key features - -- Tutorial - - From the main screen, open up the tutorial and play through it to learn basic controls - - If you get stuck at one of the levels, ask for help or skip it; the point is mostly to learn the controls -- Editing lanes - - Open up the map layers (bottom right, near the minimap) and explore a bit - - Check out the bike network in Leeds -- not a very connected network, is it? - - Click edit map at the top, then go click on individual lanes to change their type - - Based on your personal experience in Leeds (or another map) or some guesses, where should some cycle lanes go? - - From edit mode, you can name your proposal (from "Untitled Proposal"). It's saved in a file in the **data/player/edits/gb/leeds/north_leeds** folder. We can share/discuss ideas everybody comes up with later. -- Editing traffic signals - - Around 6:30 on the North Leeds map, there's some congestion near Sheepscar St and Meanwood Rd - - Click one of the traffic signals, then edit - - Try to improve the timing or synchronize multiple intersections - - Hint: you can edit multiple intersections at the same time - - If you like fiddling with traffic signals, go to the main menu, pick challenges, and try traffic signal survivor -- Evaluating an edited map - - Go to the main screen, then click Community Proposals - - Try the Lake Washington Blvd proposal - - Click on interesting roads and intersections (especially around the proposal) and explore the time-series data - - Explore the map layers (bottom right, near the minimap) -- particularly throughput, compared to before the proposal - - Explore the "more data" dashboards in the top-right corner - - Find some individual trips that became faster or slower due to this change, and figure out why - - Does the change make fast trips much slower? Does it affect trips that were longer to begin with? -- Gridlock - - Many problems cause the vehicles in a traffic simulation to get stuck, usually unrealistically - - Go try the weekday scenario in some Seattle maps -- Phinney Ride, Ballard, South Seattle - - What are some of the problems you can find? Which might be caused by bad data? Which look like simulation bugs? - -## 2 Import a new region (intermediate) 10 min - -This exercise requires familiarity with using the command line. Go follow https://a-b-street.github.io/docs/howto/new_city.html. If you downloaded the .zip, then you can run the importer tool on a .osm file. You may also need to get [osmconvert](https://wiki.openstreetmap.org/wiki/Osmconvert) running. - -Your goal is to import a city you're interested in. Reach out if you need help! - -## 3 abstr (advanced) 10 min - -Take a look at the documentation at https://a-b-street.github.io/abstr/ - -Install and load the package (and the tidyverse for data manipulation and tmap for mapping) as follows: - -```{r, eval=FALSE} -remotes::install_github("a-b-street/abstr") -``` - -```{r, message=FALSE} -library(abstr) -library(tidyverse) -library(tmap) -tmap_mode("view") -``` - -Reproduce the examples in the README and visualise only the resulting driving trips at the disaggregated (building to building) level. -The results should look like this: - -```{r, echo=FALSE} -ablines_dutch = ab_scenario( - houses = leeds_houses, - buildings = leeds_buildings, - desire_lines = leeds_desire_lines, - zones = leeds_zones, - output_format = "sf" -) -ablines_dutch_drive = ablines_dutch %>% - filter(mode_base == "Drive") -tm_shape(leeds_buildings, bbox = sf::st_bbox(ablines_dutch_drive)) + tm_polygons() + - tm_shape(leeds_houses) + tm_polygons(col = "blue") + - tm_shape(ablines_dutch_drive) + tm_lines(col = "red") -``` - - -Bonus: look-up the documentation for the function `stplanr::route()` and use that in combination with the function `stplanr::route_osrm()` to calculate driving routes associated with the desire lines shown above. - - -```{r} -ablines_dutch_drive_routes = stplanr::route( - l = ablines_dutch_drive, - route_fun = stplanr::route_osrm, - osrm.profile = "car" -) -ablines_dutch_drive_routes$n = 1 -ablines_rnet = stplanr::overline(ablines_dutch_drive_routes, "n") -tm_shape(ablines_rnet) + tm_lines(lwd = "n", scale = 9) -``` - -What is the mean average and standard deviation of driving route distances in that scenario? - -```{r, eval=FALSE, echo=FALSE} -mean(ablines_dutch_drive_routes$distance) -sd(ablines_dutch_drive_routes$distance) -``` - - -Which of those routes could be replaced by active modes? - - -Bonus (warning: very difficult): Generate an A/B Street scenario of your choice for the local authority of Hereford (or any other local authority in England and Wales) starting with this code. Note: this requires fast internet connection and decent computer: - -```{r, message=FALSE} -local_authority_name = "Herefordshire" -pct_region_name = pct::pct_regions_lookup %>% - filter(lad16nm == local_authority_name) %>% - pull(region_name) -zones_region = pct::get_pct_zones(region = pct_region_name, geography = "msoa") -table(zones_region$lad_name) -nrow(zones_region) -zones = zones_region %>% - filter(str_detect(string = lad_name, pattern = local_authority_name)) -nrow(zones) -plot(zones %>% select(bicycle:car_passenger)) -od_national = pct::get_od() -desire_lines = od::od_to_sf(od_national, z = zones) -qtm(zones, alpha = 0.3) + - tm_shape(desire_lines) + - tm_lines(lwd = "all", scale = 5) -``` - - - -## Working on the the exercises 20 min - - - - - diff --git a/messages/2021/seminar1-workshop.md b/messages/2021/seminar1-workshop.md deleted file mode 100644 index 442d237..0000000 --- a/messages/2021/seminar1-workshop.md +++ /dev/null @@ -1,207 +0,0 @@ - -# Seminar 1: Data driven and gamified transport planning with A/B Street - -14:00-14:10: Introduction to the session (Robin) 14:10-14:40: A/B Street -Lecture (Dustin - video) 14:40-14:55: Q&A - -14:55-15:00 Break - -15:00-16:00 Exercises 16:00-16:30 Discussion of coursework ideas in -breakout rooms - -Exercises, in ascending order of difficulty: - -## Getting started (everyone) - 15 min - - - - -- Download A/B Street and get it running: - -- Open sandbox mode, then change the map at the top -- Choose to download more cities, and pick **gb/leeds** -- Open up North Leeds -- Find the University of Leeds ITS building (using your own knowledge, - the search tool to the bottom right, or by following the GIF below) - -![Imgur](https://i.imgur.com/5I7NeGm.gif) - -- Change the scenario at the top from “none” to “trips between home - and work” -- Run the simulation until about 6:30 (using the speed controls at the - bottom) - -Note: If you can’t install A/B Street on your computer, ask for help. As -a last resort, you can run it in your browser, but this will be slower: -abstreet.s3-website.us-east-2.amazonaws.com/dev/game/?–dev&gb/leeds/maps/north.bin - -**Next you will choose one from the following three options.** - -## 1 In Game exercises - demo 5 min on key features - -- Tutorial - - From the main screen, open up the tutorial and play through it - to learn basic controls - - If you get stuck at one of the levels, ask for help or skip it; - the point is mostly to learn the controls -- Editing lanes - - Open up the map layers (bottom right, near the minimap) and - explore a bit - - Check out the bike network in Leeds – not a very connected - network, is it? - - Click edit map at the top, then go click on individual lanes to - change their type - - Based on your personal experience in Leeds (or another map) or - some guesses, where should some cycle lanes go? - - From edit mode, you can name your proposal (from “Untitled - Proposal”). It’s saved in a file in the - **data/player/edits/gb/leeds/north\_leeds** folder. We can - share/discuss ideas everybody comes up with later. -- Editing traffic signals - - Around 6:30 on the North Leeds map, there’s some congestion near - Sheepscar St and Meanwood Rd - - Click one of the traffic signals, then edit - - Try to improve the timing or synchronize multiple intersections - - Hint: you can edit multiple intersections at the same time - - If you like fiddling with traffic signals, go to the main menu, - pick challenges, and try traffic signal survivor -- Evaluating an edited map - - Go to the main screen, then click Community Proposals - - Try the Lake Washington Blvd proposal - - Click on interesting roads and intersections (especially around - the proposal) and explore the time-series data - - Explore the map layers (bottom right, near the minimap) – - particularly throughput, compared to before the proposal - - Explore the “more data” dashboards in the top-right corner - - Find some individual trips that became faster or slower due to - this change, and figure out why - - Does the change make fast trips much slower? Does it affect - trips that were longer to begin with? -- Gridlock - - Many problems cause the vehicles in a traffic simulation to get - stuck, usually unrealistically - - Go try the weekday scenario in some Seattle maps – Phinney Ride, - Ballard, South Seattle - - What are some of the problems you can find? Which might be - caused by bad data? Which look like simulation bugs? - -## 2 Import a new region (intermediate) 10 min - -This exercise requires familiarity with using the command line. Go -follow . If you -downloaded the .zip, then you can run the importer tool on a .osm file. -You may also need to get -[osmconvert](https://wiki.openstreetmap.org/wiki/Osmconvert) running. - -Your goal is to import a city you’re interested in. Reach out if you -need help! - -## 3 abstr (advanced) 10 min - -Take a look at the documentation at - - -Install and load the package (and the tidyverse for data manipulation -and tmap for mapping) as follows: - -``` r -remotes::install_github("a-b-street/abstr") -``` - -``` r -library(abstr) -library(tidyverse) -library(tmap) -tmap_mode("view") -``` - -Reproduce the examples in the README and visualise only the resulting -driving trips at the disaggregated (building to building) level. The -results should look like this: - - ## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 7.0.0 - -![](seminar1-workshop_files/figure-gfm/unnamed-chunk-3-1.png) - -Bonus: look-up the documentation for the function `stplanr::route()` and -use that in combination with the function `stplanr::route_osrm()` to -calculate driving routes associated with the desire lines shown above. - -``` r -ablines_dutch_drive_routes = stplanr::route( - l = ablines_dutch_drive, - route_fun = stplanr::route_osrm, - osrm.profile = "car" -) -``` - - ## Most common output is sf - -``` r -ablines_dutch_drive_routes$n = 1 -ablines_rnet = stplanr::overline(ablines_dutch_drive_routes, "n") -tm_shape(ablines_rnet) + tm_lines(lwd = "n", scale = 9) -``` - - ## Legend for line widths not available in view mode. - -![](seminar1-workshop_files/figure-gfm/unnamed-chunk-4-1.png) - -What is the mean average and standard deviation of driving route -distances in that scenario? - -Which of those routes could be replaced by active modes? - -Bonus (warning: very difficult): Generate an A/B Street scenario of your -choice for the local authority of Hereford (or any other local authority -in England and Wales) starting with this code. Note: this requires fast -internet connection and decent computer: - -``` r -local_authority_name = "Herefordshire" -pct_region_name = pct::pct_regions_lookup %>% - filter(lad16nm == local_authority_name) %>% - pull(region_name) -zones_region = pct::get_pct_zones(region = pct_region_name, geography = "msoa") -table(zones_region$lad_name) -``` - - ## - ## Bromsgrove Herefordshire, County of Malvern Hills - ## 14 23 11 - ## Redditch Worcester Wychavon - ## 13 14 19 - ## Wyre Forest - ## 14 - -``` r -nrow(zones_region) -``` - - ## [1] 108 - -``` r -zones = zones_region %>% - filter(str_detect(string = lad_name, pattern = local_authority_name)) -nrow(zones) -``` - - ## [1] 23 - -``` r -plot(zones %>% select(bicycle:car_passenger)) -``` - -![](seminar1-workshop_files/figure-gfm/unnamed-chunk-6-1.png) - -``` r -od_national = pct::get_od() -desire_lines = od::od_to_sf(od_national, z = zones) -qtm(zones, alpha = 0.3) + - tm_shape(desire_lines) + - tm_lines(lwd = "all", scale = 5) -``` - -![](seminar1-workshop_files/figure-gfm/unnamed-chunk-6-2.png) - -## Working on the the exercises 20 min diff --git a/messages/2021/seminar2-workshop.Rmd b/messages/2021/seminar2-workshop.Rmd deleted file mode 100644 index 74571ff..0000000 --- a/messages/2021/seminar2-workshop.Rmd +++ /dev/null @@ -1,27 +0,0 @@ ---- -output: github_document ---- - - -# Seminar 2: Geographic data in the transport planning industry - -This seminar will be in 2 parts - -## Part 1: seminar - -- 10 min introducing the seminar - Robin -- 30 min talk on use of data science methods in Remix - Peter -- 20 min questions -- 5 min break - -## Part 2: coursework - -- Talk over updated dissertation guidance, available here: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#coursework-guidance ~10 min - -- Questions from the group about the coursework ~ 5 m - -- Breakout rooms - each person will have ~3 minutes to present their ideas to the group and get feedback from the chair - ~45 min - -- Discussion of break out rooms ~15 min - -- Live demo of using RMarkdown to write a coursework topic and technical questions - see https://bookdown.org/yihui/rmarkdown-cookbook/ - 15 min \ No newline at end of file diff --git a/messages/2021/seminar2-workshop.md b/messages/2021/seminar2-workshop.md deleted file mode 100644 index 34b397d..0000000 --- a/messages/2021/seminar2-workshop.md +++ /dev/null @@ -1,28 +0,0 @@ - -# Seminar 2: Geographic data in the transport planning industry - -This seminar will be in 2 parts - -## Part 1: seminar - -- 10 min introducing the seminar - Robin -- 30 min talk on use of data science methods in Remix - Peter -- 20 min questions -- 5 min break - -## Part 2: coursework - -- Talk over updated dissertation guidance, available here: - - \~10 min - -- Questions from the group about the coursework \~ 5 m - -- Breakout rooms - each person will have \~3 minutes to present their - ideas to the group and get feedback from the chair - \~45 min - -- Discussion of break out rooms \~15 min - -- Live demo of using RMarkdown to write a coursework topic and - technical questions - see - - 15 min diff --git a/messages/2021/welcome.Rmd b/messages/2021/welcome.Rmd deleted file mode 100644 index 45beaba..0000000 --- a/messages/2021/welcome.Rmd +++ /dev/null @@ -1,89 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -``` - - -Dear Students and aspiring Data Scientists, - -## Welcome - -Welcome to Transport Data Science. -In this module you will learn data science skills and how to apply them to solve real world problems, with a focus on transport planning. -Transport is by definition a geographic phenomenon, meaning the movement from one place to another: -"the purpose of transportation is to overcome space" [@rodrigue_geography_2013]. - -Becoming good at transport data science therefore means becoming good at data science in general and geographic data science in particular. -2021 is an exciting time to be learning data science for transport applications. -A wide range of new and open source tools, combined with unprecedented amounts of new data, enable researchers to visualise/model/understand transport systems in more breadth and depth than ever before [@lovelace_open_2021]. - -## About the course - -Based on an understanding of the evolving nature of data science as a collaborative and open source code-driven enterprise, this module takes a 'learning by doing' approach. -The contact time will consist of - -- 5 * one hour lectures that will be released as videos at the beginning of each week (starting from next week) -- 5 * 2.5 hour practicals in which we will work through data science problems and solutions together -- 2 * 2.5 hour seminars consisting of an hour of an external speaker talking about transport data science used in the wild -- most importantly, you will dedicate substantial time (several hours per week) to practicing data science, resulting in a reproducible project portfolio in which you will demonstrate your data science skills - -The module timetable is shown in the table below. - -```{r, message=FALSE, echo=FALSE} -timetable = read_csv("../timetable-2020.csv") -timetable %>% - mutate(Duration = DTEND - DTSTART) %>% - mutate(Duration = case_when( - str_detect(SUMMARY, "deadline 1") ~ Duration + 59 + 60 * 3, - Duration == min(Duration) ~ Duration + 59, - TRUE ~ Duration)) %>% - # mutate(Duration = lubridate::hour(Duration)) %>% - select(-UID, -DTEND) %>% - rename_with(str_to_title) %>% - knitr::kable() -``` - - -You will need to work hard to become a proficient data scientist in the transport domain. -The work presented in the table above will provide you with the foundation you need to progress. -But you will need to bring a vital ingredient to the course: passion. - -## Homework - -The first thing you should do is ensure that you have the timetable stored safely in your calendar, so you do not miss important practicals or seminars. -You can watch the lectures in your own time but you *must* ensure you have watched each one and taken notes before each practical session on Thursdays. - -The second thing you should do is ensure that you have the necessary software installed on your computer and that you have tested that you can use it for the datasets we will be using in the course. -**You must read Chapters 1 to 4 of the book Reproducible Road Safety Research with R [@lovelace_reproducible_2020].** -Most important section of those 4 chapters is Section 1.5, which explains how to install R and RStudio. -This is not a theoretical section: I suggest you follow the instructions here ASAP: https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: https://itsleeds.github.io/rrsrr/basics.html -- Chapter 3 on using RStudio: https://itsleeds.github.io/rrsrr/rstudio.html -- Chapter 4 on installing R packages, **make sure you can install packages such as sf**: https://itsleeds.github.io/rrsrr/pkgs.html - -The third and final thing to do is to *engage* with the reading content and platforms associated with this module. -See the reading list associated with the module at https://github.com/ITSLeeds/TDS/blob/master/catalogue.md#reading-list. -Read-up on the references and links provided in the References section below. -Check out the course's online home on the GitHub platform at https://github.com/ITSLeeds/TDS. -(Fun fact, you can find the source code underlying this message [here](https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.Rmd).) -To collaborate on GitHub you should sign-up to get a GitHub account here: https://github.com/ -After you have a GitHub account you can ask questions by opening an 'Issue' in the project's repository, a common way for professional data scientists to communicate about code and their work: https://github.com/ITSLeeds/TDS/issues/ -And ensure you can access the Transport Data Science Team on Microsoft Teams and please do say hello, as shown below. - -![](https://user-images.githubusercontent.com/1825120/106159315-8f1fd880-617c-11eb-91be-1a6a123082b2.png) - -And please do ask any questions do ask, either via email or preferably on Teams or on a GitHub issue. It’s always better ask than to ‘suffer in silence!’ - -I really look forward to working with you all to make this a successful and enjoyable module. - -# References - diff --git a/messages/2021/welcome.md b/messages/2021/welcome.md deleted file mode 100644 index 261b1b1..0000000 --- a/messages/2021/welcome.md +++ /dev/null @@ -1,144 +0,0 @@ - - - -Dear Students and aspiring Data Scientists, - -## Welcome - -Welcome to Transport Data Science. In this module you will learn data -science skills and how to apply them to solve real world problems, with -a focus on transport planning. Transport is by definition a geographic -phenomenon, meaning the movement from one place to another: “the purpose -of transportation is to overcome space” (Rodrigue, Comtois, and Slack -2013). - -Becoming good at transport data science therefore means becoming good at -data science in general and geographic data science in particular. 2021 -is an exciting time to be learning data science for transport -applications. A wide range of new and open source tools, combined with -unprecedented amounts of new data, enable researchers to -visualise/model/understand transport systems in more breadth and depth -than ever before (Lovelace 2021). - -## About the course - -Based on an understanding of the evolving nature of data science as a -collaborative and open source code-driven enterprise, this module takes -a ‘learning by doing’ approach. The contact time will consist of - -- 5 \* one hour lectures that will be released as videos at the - beginning of each week (starting from next week) -- 5 \* 2.5 hour practicals in which we will work through data science - problems and solutions together -- 2 \* 2.5 hour seminars consisting of an hour of an external speaker - talking about transport data science used in the wild -- most importantly, you will dedicate substantial time (several hours - per week) to practicing data science, resulting in a reproducible - project portfolio in which you will demonstrate your data science - skills - -The module timetable is shown in the table below. - -| Summary | Description | Dtstart | Location | Duration | -|:---------------------------|:--------------------------------------------------------------------|:--------------------|:---------------|:---------| -| TDS deadline 1 | Computer set-up | 2021-01-29 13:00:00 | Online - Teams | 240 mins | -| TDS Lecture 1: intro | Introduction to transport data science in Online - Teams | 2021-02-01 09:00:00 | Online - Teams | 60 mins | -| TDS Practical 1: structure | The structure of transport data in Online - Teams | 2021-02-04 14:00:00 | Online - Teams | 150 mins | -| TDS Lecture 2: structure | The structure of transport data and data cleaning in Online - Teams | 2021-02-08 09:00:00 | Online - Teams | 60 mins | -| TDS Practical 2: getting | Getting transport data in Online - Teams | 2021-02-11 14:00:00 | Online - Teams | 150 mins | -| TDS Lecture 3: routing | Routing in Online - Teams | 2021-02-15 09:00:00 | Online - Teams | 60 mins | -| TDS seminar 1 | Mapping large datasets | 2021-02-18 14:00:00 | Online - Teams | 150 mins | -| TDS deadline 2 | Practical: visualising transport data | 2021-02-19 13:00:00 | Online - Teams | 150 mins | -| TDS Practical 3: routing | Routing in Online - Teams | 2021-02-25 14:00:00 | Online - Teams | 150 mins | -| TDS seminar 2 | Data science in transport planning | 2021-03-04 14:00:00 | Online - Teams | 150 mins | -| TDS Lecture 4: viz | Visualisation in Online - Teams | 2021-03-15 09:00:00 | Online - Teams | 60 mins | -| TDS Practical 4: modelling | Modelling in Online - Teams | 2021-03-18 14:00:00 | Online - Teams | 150 mins | -| TDS Lecture 5: project | Project work in Online - Teams | 2021-03-22 09:00:00 | Online - Teams | 60 mins | -| TDS deadline 3 | Draft portfolio | 2021-03-26 13:00:00 | Online - Teams | 60 mins | -| TDS Practical 5: project | Project work in Online - Teams | 2021-04-29 14:00:00 | Online - Teams | 150 mins | -| TDS deadline 4 | Deadline: coursework, 2pm | 2021-05-14 13:00:00 | Online - Teams | 60 mins | - -You will need to work hard to become a proficient data scientist in the -transport domain. The work presented in the table above will provide you -with the foundation you need to progress. But you will need to bring a -vital ingredient to the course: passion. - -## Homework - -The first thing you should do is ensure that you have the timetable -stored safely in your calendar, so you do not miss important practicals -or seminars. You can watch the lectures in your own time but you *must* -ensure you have watched each one and taken notes before each practical -session on Thursdays. - -The second thing you should do is ensure that you have the necessary -software installed on your computer and that you have tested that you -can use it for the datasets we will be using in the course. **You must -read Chapters 1 to 4 of the book Reproducible Road Safety Research with -R (Lovelace 2020).** Most important section of those 4 chapters is -Section 1.5, which explains how to install R and RStudio. This is not a -theoretical section: I suggest you follow the instructions here ASAP: - - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: - -- Chapter 3 on using RStudio: - -- Chapter 4 on installing R packages, **make sure you can install - packages such as sf**: - -The third and final thing to do is to *engage* with the reading content -and platforms associated with this module. See the reading list -associated with the module at -. -Read-up on the references and links provided in the References section -below. Check out the course’s online home on the GitHub platform at -. (Fun fact, you can find the source -code underlying this message -[here](https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.Rmd).) -To collaborate on GitHub you should sign-up to get a GitHub account -here: After you have a GitHub account you can ask -questions by opening an ‘Issue’ in the project’s repository, a common -way for professional data scientists to communicate about code and their -work: And ensure you can -access the Transport Data Science Team on Microsoft Teams and please do -say hello, as shown below. - -![](https://user-images.githubusercontent.com/1825120/106159315-8f1fd880-617c-11eb-91be-1a6a123082b2.png) - -And please do ask any questions do ask, either via email or preferably -on Teams or on a GitHub issue. It’s always better ask than to ‘suffer in -silence!’ - -I really look forward to working with you all to make this a successful -and enjoyable module. - -# References - -
- -
- -Lovelace, Robin. 2020. “Reproducible Road Safety Research with R.” RAC -Foundation. - -
- -
- -———. 2021. “Open Source Tools for Geographic Analysis in Transport -Planning.” *Journal of Geographical Systems*, January. -. - -
- -
- -Rodrigue, Jean-Paul, Claude Comtois, and Brian Slack. 2013. *The -Geography of Transport Systems*. Third. London, New York: Routledge. - -
- -
diff --git a/messages/2022/example.Rmd b/messages/2022/example.Rmd deleted file mode 100644 index 205f53e..0000000 --- a/messages/2022/example.Rmd +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "demo rmarkdown" -output: html_document ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -``` - -# This is a heading - -## Subheading - -```{r} -library(tidyverse) -library(stplanr) -od_data = od_data_sample - -names(od_data) -# Ctl + Shift + M -od_data_walk = od_data %>% - rename(walk = foot) %>% - filter(walk > 0) %>% - select(geo_code1, geo_code2, all, car_driver, walk, bicycle) %>% - mutate( - proportion_walk = walk / all, - proportion_drive = car_driver / all - ) -class(od_data) - -# calculate the % of OD have at least 1 person walking? -nrow(od_data_walk) / nrow(od_data) * 100 -od_data_walk_cycle = od_data_walk %>% - mutate(pcycle = bicycle / all) - -# Exercise 3 -cor(od_data_walk_cycle$proportion_walk, od_data_walk_cycle$pcycle) -plot(od_data_walk_cycle$proportion_walk, od_data_walk_cycle$pcycle) -od_data_walk_cycle %>% - ggplot() + - geom_point(aes(proportion_walk, pcycle)) + - scale_y_continuous(labels = scales::percent) + - scale_x_continuous(labels = scales::percent) -``` - diff --git a/messages/2022/practical1-notes.R b/messages/2022/practical1-notes.R deleted file mode 100644 index c9318e1..0000000 --- a/messages/2022/practical1-notes.R +++ /dev/null @@ -1,57 +0,0 @@ -install.packages("remotes") -pkgs = c( - "nycflights13",# data package - "pct", # package for getting travel data in the UK - "sf", # spatial data package - "stats19", # downloads and formats open stats19 crash data - "stplanr", # for working with origin-destination and route data - "tidyverse", # a package for user friendly data science - "tmap" # for making maps -) -remotes::install_cran(pkgs) -remotes::install_github("nowosad/spDataLarge") - -install.packages("terra", type = "binary") -?install_cran -od_data = od_data_sample - -names(od_data) -# Ctl + Shift + M -od_data_walk = od_data %>% - rename(walk = foot) %>% - filter(walk > 0) %>% - select(geo_code1, geo_code2, all, car_driver, walk, bicycle) %>% - mutate( - proportion_walk = walk / all, - proportion_drive = car_driver / all - ) -class(od_data) - -# calculate the % of OD have at least 1 person walking? -nrow(od_data_walk) / nrow(od_data) * 100 -od_data_walk_cycle = od_data_walk %>% - mutate(pcycle = bicycle / all) - -# Exercise 3 -cor(od_data_walk_cycle$proportion_walk, od_data_walk_cycle$pcycle) -plot(od_data_walk_cycle$proportion_walk, od_data_walk_cycle$pcycle) -od_data_walk_cycle %>% - ggplot() + - geom_point(aes(proportion_walk, pcycle)) + - scale_y_continuous(labels = scales::percent) + - scale_x_continuous(labels = scales::percent) - -?od_data_sample -zones_sf -summary(zones_sf$geo_code %in% od_data$geo_code1) - -od_data_sf = od::od_to_sf(x = od_data, z = zones_sf) -library(tmap) -tmap_mode("view") -tm_shape(zones_sf, alpha = 0.2) + - tm_polygons() + - tm_shape(od_data_sf) + - tm_lines() -?tm_basemap - - diff --git a/messages/2022/practical1.Rmd b/messages/2022/practical1.Rmd deleted file mode 100644 index 258230c..0000000 --- a/messages/2022/practical1.Rmd +++ /dev/null @@ -1,27 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - -# Session plan for Practical 1 - -- In person event in West Teaching Lab Cluster (B.16), in the Fine Art Building: https://students.leeds.ac.uk/rooms#building=Fine-Art-Building -- Work through the [practical1](https://github.com/ITSLeeds/TDS/blob/master/practicals/1-intro.md) and [practical2](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md) - -## Agenda {-} - - - -1. Set-up, thinking about transport data science and homework - 30 min -1. Project set-up and using RStudio (this and subsequent points are based on [practical2](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md)) - 30 minutes -1. Getting started with transport data in the stplanr package - 30 minutes -1. Working alone through the questions on processing OD data - 1 hr - -1. Bonus: Work through [Chapter 5](https://r4ds.had.co.nz/transform.html#filter-rows-with-filter) of R for Data Science \ No newline at end of file diff --git a/messages/2022/practical1.md b/messages/2022/practical1.md deleted file mode 100644 index 6e7b6cb..0000000 --- a/messages/2022/practical1.md +++ /dev/null @@ -1,30 +0,0 @@ - - - -# Session plan for Practical 1 - -- In person event in West Teaching Lab Cluster (B.16), in the Fine Art - Building: - -- Work through the - [practical1](https://github.com/ITSLeeds/TDS/blob/master/practicals/1-intro.md) - and - [practical2](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md) - -## Agenda - - - - -1. Set-up, thinking about transport data science and homework - 30 min -2. Project set-up and using RStudio (this and subsequent points are - based on - [practical2](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md)) - - 30 minutes -3. Getting started with transport data in the stplanr package - 30 - minutes -4. Working alone through the questions on processing OD data - 1 hr - -5. Bonus: Work through [Chapter - 5](https://r4ds.had.co.nz/transform.html#filter-rows-with-filter) of - R for Data Science diff --git a/messages/2022/practical2-test-code.R b/messages/2022/practical2-test-code.R deleted file mode 100644 index 75cce81..0000000 --- a/messages/2022/practical2-test-code.R +++ /dev/null @@ -1,37 +0,0 @@ -packageVersion("stplanr") -pkgs = c("dodgr", "dplyr", "knitr", "opentripplanner", "osmextract", - "remotes", "sf", "stplanr", "tidyverse", "tmap") -remotes::install_cran(pkgs) # install the stplanr package if not up-to-date - -library(sf) # Spatial data functions -library(tidyverse) # General data manipulation -library(stplanr) # General transport data functions -library(dodgr) # Local routing and network analysis -library(opentripplanner) # Connect to and use OpenTripPlanner -library(tmap) # Make maps -library(osmextract) # Download and import OpenStreetMap data -tmap_mode("plot") - -## ---- eval=TRUE, message=FALSE, warning=FALSE-------------------------------------------------------- -# ip = "localhost" # to run it on your computer (see final bonus exercise) -ip = "otp.saferactive.org" # an actual server -otpcon = otp_connect(hostname = ip, - port = 80, - router = "west-yorkshire") - -u = "https://github.com/ITSLeeds/TDS/releases/download/22/NTEM_flow.geojson" -desire_lines = read_sf(u) -head(desire_lines) - -u = "https://github.com/ITSLeeds/TDS/releases/download/22/NTEM_cents.geojson" -centroids = read_sf(u) -head(centroids) - - -tm_shape(desire_lines) + - tm_lines(lwd = "all", col = "all", scale = 4, palette = "viridis") - - -tm_shape(desire_lines) + - tm_lines(lwd = "rail", col = "rail", scale = 4, palette = "viridis", style = "jenks") - diff --git a/messages/2022/practical2.Rmd b/messages/2022/practical2.Rmd deleted file mode 100644 index 33fd5ca..0000000 --- a/messages/2022/practical2.Rmd +++ /dev/null @@ -1,15 +0,0 @@ ---- -output: github_document ---- - -In preparation for the second practical: - -- Consolidate your knowledge of transport (origin-destination) data from the first practical session ([see here for notes](https://github.com/ITSLeeds/TDS/blob/master/messages/practical1-notes.R)) and work through the rest of the [content for the first practical](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md) (sections [3](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md#3-processing-origin-destination-data-in-bristol) and [4](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md#4-processing-medium-sized-data-and-basic-visualisation) ) -- Complete the homework listed in [practical 2](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md) including creating an RMarkdown document -- Run this reproducible R script on your computer and let us know if you have any issues: https://github.com/ITSLeeds/TDS/blob/master/messages/practical2-test-code.R -- Watch the routing lecture (see Teams for link) -- Take a look at the [practical here](https://github.com/ITSLeeds/TDS/blob/master/practicals/6-routing.md), we will run through this during the practical session - -- Bonus 1: Test out GitHub's issue tracker by responding by saying Hi and your name here: https://github.com/ITSLeeds/TDS/issues/82 -- Bonus 2: Test out Pull Requests on GitHub by adding data for you here (see the first lecture and ask on Teams for more info on this): https://github.com/ITSLeeds/TDS/edit/master/messages/locations.csv - diff --git a/messages/2022/practical2.md b/messages/2022/practical2.md deleted file mode 100644 index 7a0c250..0000000 --- a/messages/2022/practical2.md +++ /dev/null @@ -1,34 +0,0 @@ - -In preparation for the second practical: - -- Consolidate your knowledge of transport (origin-destination) data - from the first practical session ([see here for - notes](https://github.com/ITSLeeds/TDS/blob/master/messages/practical1-notes.R)) - and work through the rest of the [content for the first - practical](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md) - (sections - [3](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md#3-processing-origin-destination-data-in-bristol) - and - [4](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md#4-processing-medium-sized-data-and-basic-visualisation) - ) - -- Complete the homework listed in [practical - 2](https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md) - including creating an RMarkdown document - -- Run this reproducible R script on your computer and let us know if - you have any issues: - - -- Watch the routing lecture (see Teams for link) - -- Take a look at the [practical - here](https://github.com/ITSLeeds/TDS/blob/master/practicals/6-routing.md), - we will run through this during the practical session - -- Bonus 1: Test out GitHub’s issue tracker by responding by saying Hi - and your name here: - -- Bonus 2: Test out Pull Requests on GitHub by adding data for you - here (see the first lecture and ask on Teams for more info on this): - diff --git a/messages/2022/practical3-plan-and-homework.Rmd b/messages/2022/practical3-plan-and-homework.Rmd deleted file mode 100644 index 71bd896..0000000 --- a/messages/2022/practical3-plan-and-homework.Rmd +++ /dev/null @@ -1,33 +0,0 @@ ---- -output: github_document ---- - -- Review of homework and using RMarkdown (45 minutes) - - - -1) check you have done all aspects of the homework from last week, including creating an RMarkdown document, see [here](https://github.com/ITSLeeds/TDS/blob/master/messages/practical2.md) for details. - -2) interact on Teams and on GitHub, if you have any questions about how to do this let me know on Teams, I've just shared a link to a paper on methods for pre-processing OD datasets, any other links/ideas/questions do share, or just say hi there (see link here). - -3) start thinking about project ideas and the kinds of datasets you would like to work with for the submission - remember the deadline for submitting a 2 page document outlining your idea is in just over a month (18th March) so it's a good idea to get thinking about your projects already - - - -- How to use RMarkdown for courswork, see the template here: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.Rmd - -- Testing out overpass - -Break - -- Getting stats19 data, working on your own (45 minutes) -- 15 min Q&A - -- Getting data with the pct package -- Getting data from the web - -- Homework: Making a start on the interim 2 page report, deadline: 18th March -- Reading in preparation for Practical 4: - - https://rmarkdown.rstudio.com/ - - Paper on modelling road crash data: https://arxiv.org/abs/2011.12595 - - This article on OD data: https://itsleeds.github.io/od/articles/od.html diff --git a/messages/2022/practical3-plan-and-homework.md b/messages/2022/practical3-plan-and-homework.md deleted file mode 100644 index f77c10e..0000000 --- a/messages/2022/practical3-plan-and-homework.md +++ /dev/null @@ -1,49 +0,0 @@ - -- Review of homework and using RMarkdown (45 minutes) - - - -1) check you have done all aspects of the homework from last week, - including creating an RMarkdown document, see - [here](https://github.com/ITSLeeds/TDS/blob/master/messages/practical2.md) - for details. - -2) interact on Teams and on GitHub, if you have any questions about how - to do this let me know on Teams, I’ve just shared a link to a paper - on methods for pre-processing OD datasets, any other - links/ideas/questions do share, or just say hi there (see link - here). - -3) start thinking about project ideas and the kinds of datasets you - would like to work with for the submission - remember the deadline - for submitting a 2 page document outlining your idea is in just over - a month (18th March) so it’s a good idea to get thinking about your - projects already - - - -- How to use RMarkdown for courswork, see the template here: - - -- Testing out overpass - -Break - -- Getting stats19 data, working on your own (45 minutes) - -- 15 min Q&A - -- Getting data with the pct package - -- Getting data from the web - -- Homework: Making a start on the interim 2 page report, deadline: - 18th March - -- Reading in preparation for Practical 4: - - - - - Paper on modelling road crash data: - - - This article on OD data: - diff --git a/messages/2022/practical4-live-demo.R b/messages/2022/practical4-live-demo.R deleted file mode 100644 index 00dddfa..0000000 --- a/messages/2022/practical4-live-demo.R +++ /dev/null @@ -1,66 +0,0 @@ -library(tidyverse) -library(tmap) -library(sf) - -zones = pct::get_pct_zones(region = "west-yorkshire", geography = "msoa") -zones_lsoa = pct::get_pct_zones("west-yorkshire") -nrow(zones_lsoa) -nrow(zones) - -u = "https://github.com/ITSLeeds/TDS/releases/download/0.1/desire_lines.geojson" -download.file(u, "desire_lines.geojson") -desire_lines = read_sf("desire_lines.geojson") - -ncol(zones) - -zones_clean = zones %>% - select(geo_code, foot) - -ncol(zones_clean) - -zones_active_modes = zones %>% - mutate(active = bicycle + foot) -# tmaptools::palette_explorer() -tm_shape(zones_active_modes) + - tm_fill(c("car_driver", "active"), palette = "viridis") -ggplot(zones_active_modes) + - geom_point(aes(car_driver, active)) - -zones_active_percent = zones_active_modes %>% - transmute( - geo_code = geo_code, - all = all, - proportion_car_driver = car_driver / all, - proportion_active = active / all - ) - -ggplot(zones_active_percent) + - geom_point(aes(proportion_car_driver, proportion_active, size = all), alpha = 0.5) + - scale_y_continuous(labels = scales::percent) + - scale_x_continuous(labels = scales::percent) + - ylab("Percent of trips by active modes") + - xlab("Percent of trips by driving") - -# Get traffic data -------------------------------------------------------- - -u = "https://storage.googleapis.com/dft-statistics/road-traffic/downloads/data-gov-uk/region_traffic_by_vehicle_type.csv" -regional_traffic_data = readr::read_csv(u) -traffic_clean = regional_traffic_data %>% - mutate(region_id = as.character(region_id)) %>% - group_by(year, region_id) %>% - summarise( - cycling = sum(pedal_cycles), - driving = sum(all_motor_vehicles) - ) %>% - pivot_longer(cols = matches("ing")) - -traffic_clean %>% - ggplot(aes(year, value, colour = region_id)) + - geom_line() + - facet_wrap(~ name, scales = "free") - -u = "https://storage.googleapis.com/dft-statistics/road-traffic/downloads/data-gov-uk/dft_traffic_counts_raw_counts.zip" -raw_data = readr::read_csv(u) -f = basename(u) -download.file(u, destfile = f) -raw_data = read_csv(f) diff --git a/messages/2022/practical4.Rmd b/messages/2022/practical4.Rmd deleted file mode 100644 index e886317..0000000 --- a/messages/2022/practical4.Rmd +++ /dev/null @@ -1,36 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -knitr::opts_chunk$set(warning = FALSE, message = FALSE) -``` - -# Session plan for Practical 4 - -This is will be the 4th and penultimate practical session in the TDS module. -The final practical session on coursework will be **in 2 months time, on 28th April**. - -Session content: https://github.com/ITSLeeds/TDS/blob/master/practicals/od-route-analysis.md - -The session will be structured as follows: - -- Analysis of zone data and the tidyverse (1 hour) - -- Analysis of OD level data (1 hour) - -- Brainstorming ideas for the coursework (1 hour) - - - Read through topic ideas here: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#031-topics - -Which are your favourite, and why? Which alternative topics could you think of? - - - Make a start on your 2 page project plan: deadline for submission is **in less than 1 month, 18th March** - - - Take a look at project ideas in the coursework template document and think about a topic you would like to work on: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#031-topics - - - Which of the datasets presented [here](https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#032-datasets) are of most interest to you? Are there any other datasets you would like to explore for this session? diff --git a/messages/2022/practical4.md b/messages/2022/practical4.md deleted file mode 100644 index 2d6736e..0000000 --- a/messages/2022/practical4.md +++ /dev/null @@ -1,37 +0,0 @@ - - - -# Session plan for Practical 4 - -This is will be the 4th and penultimate practical session in the TDS -module. The final practical session on coursework will be **in 2 months -time, on 28th April**. - -Session content: - - -The session will be structured as follows: - -- Analysis of zone data and the tidyverse (1 hour) - -- Analysis of OD level data (1 hour) - -- Brainstorming ideas for the coursework (1 hour) - - - Read through topic ideas here: - - -Which are your favourite, and why? Which alternative topics could you -think of? - -- Make a start on your 2 page project plan: deadline for submission is - **in less than 1 month, 18th March** - -- Take a look at project ideas in the coursework template document and - think about a topic you would like to work on: - - -- Which of the datasets presented - [here](https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#032-datasets) - are of most interest to you? Are there any other datasets you would - like to explore for this session? diff --git a/messages/2022/seminar-notes.R b/messages/2022/seminar-notes.R deleted file mode 100644 index ea202fa..0000000 --- a/messages/2022/seminar-notes.R +++ /dev/null @@ -1 +0,0 @@ -# Notes for seminar diff --git a/messages/2022/seminar1-workshop.Rmd b/messages/2022/seminar1-workshop.Rmd deleted file mode 100644 index 7a13adc..0000000 --- a/messages/2022/seminar1-workshop.Rmd +++ /dev/null @@ -1,179 +0,0 @@ ---- -output: github_document ---- - -# Seminar 1: From OD data to agent based modelling for car free futures - - -13:00-13:10: Introduction to the session (Robin) - -13:10-13:40: A/B Street Lecture (Dustin - video) - -13:40-13:55: Q&A - -13:55-14:00 Break - -14:00-15:00 Exercises - -15:00-15:30 Discussion of coursework ideas in breakout rooms - - -Exercises, in ascending order of difficulty: - -## Getting started (everyone) - 15 min - - - - -- Download A/B Street and get it running: https://a-b-street.github.io/docs/howto/index.html -- Open sandbox mode, then change the map at the top -- Choose to download more cities, and pick **gb/leeds** -- Open up North Leeds -- Find the University of Leeds ITS building (using your own knowledge, the search tool to the bottom right, or by following the GIF below) - -![Imgur](https://i.imgur.com/5I7NeGm.gif) - -- Change the scenario at the top from "none" to "trips between home and work" -- Run the simulation until about 6:30 (using the speed controls at the bottom) - -Note: If you can't install A/B Street on your computer, ask for help. As a last resort, you can run it in your browser, but this will be slower: abstreet.s3-website.us-east-2.amazonaws.com/dev/game/?--dev&gb/leeds/maps/north.bin - -**Next you will choose one from the following three options.** - -## 1 In Game exercises - demo 5 min on key features - -- Tutorial - - From the main screen, open up the tutorial and play through it to learn basic controls - - If you get stuck at one of the levels, ask for help or skip it; the point is mostly to learn the controls -- Editing lanes - - Open up the map layers (bottom right, near the minimap) and explore a bit - - Check out the bike network in Leeds -- not a very connected network, is it? - - Click edit map at the top, then go click on individual lanes to change their type - - Based on your personal experience in Leeds (or another map) or some guesses, where should some cycle lanes go? - - From edit mode, you can name your proposal (from "Untitled Proposal"). It's saved in a file in the **data/player/edits/gb/leeds/north_leeds** folder. We can share/discuss ideas everybody comes up with later. -- Editing traffic signals - - Around 6:30 on the North Leeds map, there's some congestion near Sheepscar St and Meanwood Rd - - Click one of the traffic signals, then edit - - Try to improve the timing or synchronize multiple intersections - - Hint: you can edit multiple intersections at the same time - - If you like fiddling with traffic signals, go to the main menu, pick challenges, and try traffic signal survivor -- Evaluating an edited map - - Go to the main screen, then click Community Proposals - - Try the Lake Washington Blvd proposal - - Click on interesting roads and intersections (especially around the proposal) and explore the time-series data - - Explore the map layers (bottom right, near the minimap) -- particularly throughput, compared to before the proposal - - Explore the "more data" dashboards in the top-right corner - - Find some individual trips that became faster or slower due to this change, and figure out why - - Does the change make fast trips much slower? Does it affect trips that were longer to begin with? -- Gridlock - - Many problems cause the vehicles in a traffic simulation to get stuck, usually unrealistically - - Go try the weekday scenario in some Seattle maps -- Phinney Ride, Ballard, South Seattle - - What are some of the problems you can find? Which might be caused by bad data? Which look like simulation bugs? - -## 2 Import a new region (intermediate) 10 min - -This exercise requires familiarity with using the command line. Go follow https://a-b-street.github.io/docs/howto/new_city.html. If you downloaded the .zip, then you can run the importer tool on a .osm file. You may also need to get [osmconvert](https://wiki.openstreetmap.org/wiki/Osmconvert) running. - -Your goal is to import a city you're interested in. Reach out if you need help! - -## 3 abstr (advanced) 10 min - -Take a look at the documentation at https://a-b-street.github.io/abstr/ - -Install and load the package (and the tidyverse for data manipulation and tmap for mapping) as follows: - -```{r, eval=FALSE} -remotes::install_github("a-b-street/abstr") -``` - -```{r, message=FALSE} -library(abstr) -library(tidyverse) -library(tmap) -tmap_mode("view") -``` - -Reproduce the examples in the README and visualise only the resulting driving trips at the disaggregated (building to building) level. -The results should look like this: - -```{r, echo=FALSE} -od = leeds_od -zones = leeds_zones -od[[1]] = c("E02006876") -ablines = ab_scenario(od, zones = zones, origin_buildings = leeds_buildings) -plot(leeds_zones$geometry) -plot(leeds_buildings$geometry, add = TRUE) -plot(ablines["mode"], add = TRUE) -ablines_json = ab_json(ablines, scenario_name = "test") -od = leeds_desire_lines -names(od)[4:6] = c("Walk", "Bike", "Drive") -ablines = ab_scenario( - od = od, - zones = leeds_site_area, - zones_d = leeds_zones, - origin_buildings = leeds_houses, - destination_buildings = leeds_buildings, - output = "sf" -) -ablines_drive = ablines %>% - filter(mode == "Drive") -tm_shape(leeds_buildings, bbox = sf::st_bbox(ablines_drive)) + tm_polygons() + - tm_shape(leeds_houses) + tm_polygons(col = "blue") + - tm_shape(ablines_drive) + tm_lines(col = "red") -``` - - -Bonus: look-up the documentation for the function `stplanr::route()` and use that in combination with the function `stplanr::route_osrm()` to calculate driving routes associated with the desire lines shown above. - - -```{r} -ablines_drive_routes = stplanr::route( - l = ablines_drive, - route_fun = stplanr::route_osrm, - osrm.profile = "car" -) -ablines_drive_routes$n = 1 -ablines_rnet = stplanr::overline(ablines_drive_routes, "n") -tm_shape(ablines_rnet) + tm_lines(lwd = "n", scale = 9) -``` - -What is the mean average and standard deviation of driving route distances in that scenario? - -```{r, eval=FALSE, echo=FALSE} -mean(ablines_drive_routes$distance) -sd(ablines_drive_routes$distance) -``` - - -Which of those routes could be replaced by active modes? - - -Bonus (warning: very difficult): Generate an A/B Street scenario of your choice for the local authority of Hereford (or any other local authority in England and Wales) starting with this code. Note: this requires fast internet connection and decent computer: - -```{r, message=FALSE} -local_authority_name = "Herefordshire" -pct_region_name = pct::pct_regions_lookup %>% - filter(lad16nm == local_authority_name) %>% - pull(region_name) -zones_region = pct::get_pct_zones(region = pct_region_name, geography = "msoa") -table(zones_region$lad_name) -nrow(zones_region) -zones = zones_region %>% - filter(str_detect(string = lad_name, pattern = local_authority_name)) -nrow(zones) -plot(zones %>% select(bicycle:car_passenger)) -od_national = pct::get_od() -desire_lines = od::od_to_sf(od_national, z = zones) -qtm(zones, alpha = 0.3) + - tm_shape(desire_lines) + - tm_lines(lwd = "all", scale = 5) -``` - - - -## Working on the the exercises 20 min - - - - - diff --git a/messages/2022/seminar1-workshop.md b/messages/2022/seminar1-workshop.md deleted file mode 100644 index 3a1fdff..0000000 --- a/messages/2022/seminar1-workshop.md +++ /dev/null @@ -1,33 +0,0 @@ - -# TDS Seminar: From OD data to agent based modelling for car free futures - -13:00-13:10: Introduction to the session - -13:10-13:40: A/B Street Lecture - -13:40-13:55: Q&A - -13:55-14:00 Break - -14:00-15:00 Exercises - -15:00-15:30 Discussion of coursework ideas in breakout rooms - -## Preparation - -Please install A/B Street ahead of time: -You can't import new maps or follow along with all the exercises in the web version. If you hit problems, please contact . - -## Slides - -Find the slides at . (And feel free to use Github to discuss content, raise questions/ideas -- https://github.com/dabreegster/talks/tree/main/tds_seminar_synthpop) - -## Location - -The seminar will take place in the Parkinson Building, the tallest building in the University of Leeds (you cannot miss it!). -It's in Parkinson building room B.09. -We'll be having lunch in the Parkinson building cafe before so feel free to meet there at around 12:30 or in the room around 12:50 to get set-up before the session begins. - -Details on the room: https://students.leeds.ac.uk/rooms?type=room&id=100019 - -For livestream on Teams see [here](https://teams.microsoft.com/l/meetup-join/19%3ameeting_NDE2ZjQzNjQtM2M0YS00ZDFhLWFhN2ItYzA3YjA1YWFmOTNj%40thread.v2/0?context=%7b%22Tid%22%3a%22bdeaeda8-c81d-45ce-863e-5232a535b7cb%22%2c%22Oid%22%3a%22db432bda-89ab-48ff-8016-978914bd784a%22%7d) diff --git a/messages/2022/test.Rmd b/messages/2022/test.Rmd deleted file mode 100644 index 79f1d34..0000000 --- a/messages/2022/test.Rmd +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: Test -output: bookdown::html_document2 -# output: -# html_document: -# toc: true -# toc_depth: 2 -# number_section: true -bibliography: references.bib ---- - -```{r, include=FALSE} -# load some packages -library(tidyverse) -library(stplanr) -library(tmap) -``` - -# Context - -This is some content. It uses the `tidyverse` package. -**This bold.** -We build on the the work of @anda2017transport. -This is another way of doing citations [@anda2017transport]. - -Here is another one [@peyre2019computational]. - -```{r, eval=FALSE} -# To generate this result I used the following code -source("data-processing.R") -``` - - - -```{r, eval=FALSE, echo=FALSE} -zones = stplanr::zones_sf -map1 = tm_shape(zones) + - tm_polygons() -tmap_save(map1, "map1.png") -``` - -See Figure \@ref(fig:leeds) below. - -```{r leeds, echo=FALSE, fig.cap="This is a map of Leeds."} -knitr::include_graphics("map1.png") -``` - - - - - -```{r} -1 + 1 -``` - -# Introduction - - - - -# References diff --git a/messages/2022/welcome.Rmd b/messages/2022/welcome.Rmd deleted file mode 100644 index 3850429..0000000 --- a/messages/2022/welcome.Rmd +++ /dev/null @@ -1,84 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -``` - - -Dear Students and aspiring Data Scientists, - -## Welcome - -Welcome to Transport Data Science. -In this module you will learn data science skills and how to apply them to solve real world problems, with a focus on transport planning. -Transport is by definition a geographic phenomenon, meaning the movement from one place to another: -"the purpose of transportation is to overcome space" [@rodrigue_geography_2013]. - -Becoming good at transport data science therefore means becoming good at data science in general and geographic data science in particular. -2021 is an exciting time to be learning data science for transport applications. -A wide range of new and open source tools, combined with unprecedented amounts of new data, enable researchers to visualise/model/understand transport systems in more breadth and depth than ever before [@lovelace_open_2021]. - -## About the course - -Based on an understanding of the evolving nature of data science as a collaborative and open source code-driven enterprise, this module takes a 'learning by doing' approach. -The contact time will consist of - -- 5 * one hour lectures that will be released as videos at the beginning of each week (starting from next week) -- 5 * 3 hour practicals in which we will work through data science problems and solutions together -- A seminar consisting of an hour of an external speaker talking about transport data science used in the wild -- most importantly, you will dedicate substantial time (several hours per week) to practicing data science, resulting in a reproducible project portfolio in which you will demonstrate your data science skills - -The module timetable is shown in the table below. - -```{r, message=FALSE, echo=FALSE} -timetable = read_csv("../timetable.csv") -# timetable = read_csv("timetable.csv") -timetable %>% - select(-description) %>% - rename_with(str_to_title) %>% - knitr::kable() -``` - - -You will need to work hard to become a proficient data scientist in the transport domain. -The work presented in the table above will provide you with the foundation you need to progress. -But you will need to bring a vital ingredient to the course: passion. - -## Homework - -The first thing you should do is ensure that you have the timetable stored safely in your calendar, so you do not miss important practicals or seminars. -You can watch the lectures in your own time but you *must* ensure you have watched each one and taken notes before each practical session on Thursdays. - -The second thing you should do is ensure that you have the necessary software installed on your computer and that you have tested that you can use it for the datasets we will be using in the course. -**You must read and try to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R.** -Most important section of those 4 chapters is Section 1.5, which explains how to install R and RStudio. -This is not a theoretical section: I suggest you follow the instructions here ASAP: https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: https://itsleeds.github.io/rrsrr/basics.html -- Chapter 3 on using RStudio: https://itsleeds.github.io/rrsrr/rstudio.html -- Chapter 4 on installing R packages, **make sure you can install packages such as sf**: https://itsleeds.github.io/rrsrr/pkgs.html - -The third and final thing to do is to *engage* with the reading content and platforms associated with this module. -See the reading list associated with the module at https://github.com/ITSLeeds/TDS/blob/master/catalogue.md#reading-list. -Read-up on the references and links provided in the References section below. -Check out the course's online home on the GitHub platform at https://github.com/ITSLeeds/TDS. -(Fun fact, you can find the source code underlying this message [here](https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.Rmd).) -To collaborate on GitHub you should sign-up to get a GitHub account here: https://github.com/ -After you have a GitHub account you can ask questions by opening an 'Issue' in the project's repository, a common way for professional data scientists to communicate about code and their work: https://github.com/ITSLeeds/TDS/issues/ -And ensure you can access the Transport Data Science Team on Microsoft Teams and please do say hello, as shown below. - -![](https://user-images.githubusercontent.com/1825120/106159315-8f1fd880-617c-11eb-91be-1a6a123082b2.png) - -And please do ask any questions do ask, either via email or preferably on Teams or on a GitHub issue. It’s always better ask than to ‘suffer in silence!’ - -I really look forward to working with you all to make this a successful and enjoyable module. - -# References - diff --git a/messages/2022/welcome.md b/messages/2022/welcome.md deleted file mode 100644 index 374671b..0000000 --- a/messages/2022/welcome.md +++ /dev/null @@ -1,135 +0,0 @@ - - - -Dear Students and aspiring Data Scientists, - -## Welcome - -Welcome to Transport Data Science. In this module you will learn data -science skills and how to apply them to solve real world problems, with -a focus on transport planning. Transport is by definition a geographic -phenomenon, meaning the movement from one place to another: “the purpose -of transportation is to overcome space” (Rodrigue, Comtois, and Slack -2013). - -Becoming good at transport data science therefore means becoming good at -data science in general and geographic data science in particular. 2021 -is an exciting time to be learning data science for transport -applications. A wide range of new and open source tools, combined with -unprecedented amounts of new data, enable researchers to -visualise/model/understand transport systems in more breadth and depth -than ever before (Lovelace 2021). - -## About the course - -Based on an understanding of the evolving nature of data science as a -collaborative and open source code-driven enterprise, this module takes -a ‘learning by doing’ approach. The contact time will consist of - -- 5 \* one hour lectures that will be released as videos at the - beginning of each week (starting from next week) -- 5 \* 3 hour practicals in which we will work through data science - problems and solutions together -- A seminar consisting of an hour of an external speaker talking about - transport data science used in the wild -- most importantly, you will dedicate substantial time (several hours - per week) to practicing data science, resulting in a reproducible - project portfolio in which you will demonstrate your data science - skills - -The module timetable is shown in the table below. - -| Summary | Date | Duration | -|:--------------------------------------------------|:-----------|---------:| -| TDS Lecture 1 | 2022-01-24 | 1 | -| TDS Lecture 2: The structure of transport data | 2022-01-31 | 1 | -| TDS Practical 1 | 2022-02-03 | 3 | -| Lecture 3: Routing | 2022-02-07 | 1 | -| TDS Practical 2: Routing | 2022-02-10 | 3 | -| TDS seminar | 2022-02-16 | 3 | -| TDS Practical 3: Data from Web Sources | 2022-02-17 | 3 | -| TDS Practical 4: OD and route data analysis | 2022-02-24 | 3 | -| TDS Lecture 4: Visualising Transport Data | 2022-03-14 | 1 | -| TDS Deadline 1: 2 page outline of project | 2022-03-18 | 1 | -| TDS Lecture 5: Transport Data Science in Practice | 2022-03-21 | 1 | -| TDS Practical 5: Visualisation and project work | 2022-04-28 | 3 | -| TDS Deadline 2: 10 page project report | 2022-05-13 | 1 | - -You will need to work hard to become a proficient data scientist in the -transport domain. The work presented in the table above will provide you -with the foundation you need to progress. But you will need to bring a -vital ingredient to the course: passion. - -## Homework - -The first thing you should do is ensure that you have the timetable -stored safely in your calendar, so you do not miss important practicals -or seminars. You can watch the lectures in your own time but you *must* -ensure you have watched each one and taken notes before each practical -session on Thursdays. - -The second thing you should do is ensure that you have the necessary -software installed on your computer and that you have tested that you -can use it for the datasets we will be using in the course. **You must -read and try to complete the exercises in Chapters 1 to 4 of the book -Reproducible Road Safety Research with R.** Most important section of -those 4 chapters is Section 1.5, which explains how to install R and -RStudio. This is not a theoretical section: I suggest you follow the -instructions here ASAP: - - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: - -- Chapter 3 on using RStudio: - -- Chapter 4 on installing R packages, **make sure you can install - packages such as sf**: - -The third and final thing to do is to *engage* with the reading content -and platforms associated with this module. See the reading list -associated with the module at -. -Read-up on the references and links provided in the References section -below. Check out the course’s online home on the GitHub platform at -. (Fun fact, you can find the source -code underlying this message -[here](https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.Rmd).) -To collaborate on GitHub you should sign-up to get a GitHub account -here: After you have a GitHub account you can ask -questions by opening an ‘Issue’ in the project’s repository, a common -way for professional data scientists to communicate about code and their -work: And ensure you can -access the Transport Data Science Team on Microsoft Teams and please do -say hello, as shown below. - -![](https://user-images.githubusercontent.com/1825120/106159315-8f1fd880-617c-11eb-91be-1a6a123082b2.png) - -And please do ask any questions do ask, either via email or preferably -on Teams or on a GitHub issue. It’s always better ask than to ‘suffer in -silence!’ - -I really look forward to working with you all to make this a successful -and enjoyable module. - -# References - -
- -
- -Lovelace, Robin. 2021. “Open Source Tools for Geographic Analysis in -Transport Planning.” *Journal of Geographical Systems*, January. -. - -
- -
- -Rodrigue, Jean-Paul, Claude Comtois, and Brian Slack. 2013. *The -Geography of Transport Systems*. Third. London, New York: Routledge. - -
- -
diff --git a/messages/2023/lecture_1_welcome.Rmd b/messages/2023/lecture_1_welcome.Rmd deleted file mode 100644 index cb2f53d..0000000 --- a/messages/2023/lecture_1_welcome.Rmd +++ /dev/null @@ -1,91 +0,0 @@ ---- -bibliography: ../tds.bib -output: github_document ---- - - - -```{r, include=FALSE} -library(tidyverse) -``` - -Dear Students and Data Scientists, - -# First lecture - -As per your [timetable](http://timetable.leeds.ac.uk/teaching/202223/reporting/Individual?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21), the first lecture is tomorrow, Monday 30th January, from 11:00 to 12:00. - -**Location:** [Lecture Theatre (LT) B (3.23)](https://students.leeds.ac.uk/rooms#building=Civil-Engineering&room=//students.leeds.ac.uk/rooms?type=room&id=99885), [Civil Engineering Building](https://students.leeds.ac.uk/rooms#building=Civil-Engineering). - -Instructions on how to get there: https://students.leeds.ac.uk/rooms#building=Civil-Engineering&room=//students.leeds.ac.uk/rooms?type=room&id=99885 - -See below for the location: - -![](https://user-images.githubusercontent.com/1825120/215348802-f065cda3-9770-404d-804a-3e8ed290e343.png) - -Please ensure that you arrive in good time, by 10:55, so you have time to get a seat in time for the 11:00 start. - -## Welcome - -Welcome to Transport Data Science. -In this module you will learn data science skills and how to apply them to solve real world problems, with a focus on transport planning. -Transport is by definition a geographic phenomenon, meaning the movement from one place to another: "the purpose of transportation is to overcome space" [@rodrigue_geography_2013]. - -Becoming good at transport data science therefore means becoming good at data science in general and geographic data science in particular. -2021 is an exciting time to be learning data science for transport applications. -A wide range of new and open source tools, combined with unprecedented amounts of new data, enable researchers to visualise/model/understand transport systems in more breadth and depth than ever before [@lovelace_open_2021]. - -## About the course - -Based on an understanding of the evolving nature of data science as a collaborative and open source code-driven enterprise, this module takes a 'learning by doing' approach. -The contact time will consist of - -- 5 \* one hour lectures that will be released as videos at the beginning of each week (starting from next week) -- 5 \* 2.5 hour practicals in which we will work through data science problems and solutions together -- A seminar consisting of an hour of an external speaker talking about transport data science used in the wild -- most importantly, you will dedicate substantial time (several hours per week) to practicing data science, resulting in a reproducible project portfolio in which you will demonstrate your data science skills - -The module timetable is shown in the table below. - -```{r, message=FALSE, echo=FALSE} -timetable = read_csv("../timetable.csv") -# timetable = read_csv("timetable.csv") -timetable %>% - select(-description) %>% - rename_with(str_to_title) %>% - knitr::kable() -``` - -You will need to work hard to become a proficient data scientist in the transport domain. -The work presented in the table above will provide you with the foundation you need to progress. -But you will need to bring a vital ingredient to the course: passion. - -## Homework for next week - -The first thing you should do is ensure that you have the timetable stored safely in your calendar, so you do not miss important practicals or seminars. - - -The second thing you should do is ensure that you have the necessary software installed on your computer and that you have tested that you can use it for the datasets we will be using in the course. -**You must read and try to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R.** Most important section of those 4 chapters is Section 1.5, which explains how to install R and RStudio. -This is not a theoretical section: I suggest you follow the instructions here ASAP: - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: -- Chapter 3 on using RStudio: -- Chapter 4 on installing R packages, **make sure you can install packages such as sf**: - -The third and final thing to do is to *engage* with the reading content and platforms associated with this module. -See the reading list associated with the module at . -Read-up on the references and links provided in the References section below. -Check out the course's online home on the GitHub platform at . -(Fun fact, you can find the source code underlying this message [here](https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.Rmd).) To collaborate on GitHub you should sign-up to get a GitHub account here: After you have a GitHub account you can ask questions by opening an 'Issue' in the project's repository, a common way for professional data scientists to communicate about code and their work: And ensure you can access the Transport Data Science Team on Microsoft Teams and please do say hello, as shown below. - - - -And please do ask any questions do ask, either via email or preferably on [Minerva](https://minerva.leeds.ac.uk/ultra/courses/_542966_1/outline) or via a GitHub issue as described above. -It's always better ask than to 'suffer in silence!' - -I really look forward to working with you all to make this a successful and enjoyable module. - -# References diff --git a/messages/2023/lecture_1_welcome.md b/messages/2023/lecture_1_welcome.md deleted file mode 100644 index e4c9245..0000000 --- a/messages/2023/lecture_1_welcome.md +++ /dev/null @@ -1,159 +0,0 @@ - - - -Dear Students and Data Scientists, - -# First lecture - -As per your -[timetable](http://timetable.leeds.ac.uk/teaching/202223/reporting/Individual?objectclass=module&idtype=name&identifier=TRAN5340M01&&template=SWSCUST+module+Individual&days=1-7&weeks=1-52&periods=1-21), -the first lecture is tomorrow, Monday 30th January, from 11:00 to 12:00. - -**Location:** [Lecture Theatre (LT) B -(3.23)](https://students.leeds.ac.uk/rooms#building=Civil-Engineering&room=//students.leeds.ac.uk/rooms?type=room&id=99885), -[Civil Engineering -Building](https://students.leeds.ac.uk/rooms#building=Civil-Engineering). - -Instructions on how to get there: - - -See below for the location: - -![](https://user-images.githubusercontent.com/1825120/215348802-f065cda3-9770-404d-804a-3e8ed290e343.png) - -Please ensure that you arrive in good time, by 10:55, so you have time -to get a seat in time for the 11:00 start. - -## Welcome - -Welcome to Transport Data Science. In this module you will learn data -science skills and how to apply them to solve real world problems, with -a focus on transport planning. Transport is by definition a geographic -phenomenon, meaning the movement from one place to another: “the purpose -of transportation is to overcome space” (Rodrigue, Comtois, and Slack -2013). - -Becoming good at transport data science therefore means becoming good at -data science in general and geographic data science in particular. 2021 -is an exciting time to be learning data science for transport -applications. A wide range of new and open source tools, combined with -unprecedented amounts of new data, enable researchers to -visualise/model/understand transport systems in more breadth and depth -than ever before (Lovelace 2021). - -## About the course - -Based on an understanding of the evolving nature of data science as a -collaborative and open source code-driven enterprise, this module takes -a ‘learning by doing’ approach. The contact time will consist of - -- 5 \* one hour lectures that will be released as videos at the - beginning of each week (starting from next week) -- 5 \* 2.5 hour practicals in which we will work through data science - problems and solutions together -- A seminar consisting of an hour of an external speaker talking about - transport data science used in the wild -- most importantly, you will dedicate substantial time (several hours - per week) to practicing data science, resulting in a reproducible - project portfolio in which you will demonstrate your data science - skills - -The module timetable is shown in the table below. - -| Summary | Date | Duration | Location | -|:---------------------------|:-----------|---------:|:-------------------------------------| -| TDS Lecture 1: structure | 2023-01-30 | 60 | Civil Engineering LT B (3.25) | -| TDS deadline 1 | 2023-02-03 | 1 | Online - Teams | -| TDS Lecture 2: od | 2023-02-06 | 60 | Civil Engineering LT B (3.25) | -| TDS Practical 1: structure | 2023-02-09 | 150 | West Teaching Lab Cluster (G.29) | -| TDS Lecture 3: routing | 2023-02-13 | 60 | Civil Engineering LT B (3.25) | -| TDS Practical 2: routing | 2023-02-16 | 150 | West Teaching Lab Cluster (G.29) | -| TDS Practical 3: od | 2023-02-23 | 150 | West Teaching Lab Cluster (G.29) | -| TDS seminar 1 | 2023-02-23 | 120 | Institute for Transport Studies 1.11 | -| TDS deadline 2 | 2023-02-24 | 1 | Online - Teams | -| TDS Practical 4: getting | 2023-03-02 | 150 | West Teaching Lab Cluster (G.29) | -| TDS seminar 2 | 2023-03-08 | 120 | Institute for Transport Studies 1.11 | -| TDS Lecture 4: viz | 2023-03-20 | 60 | Civil Engineering LT B (3.25) | -| TDS Lecture 5: project | 2023-03-27 | 60 | Civil Engineering LT B (3.25) | -| TDS Practical 5: project | 2023-05-04 | 150 | West Teaching Lab Cluster (G.29) | -| TDS deadline 3 | 2023-05-19 | 1 | Online - Teams | - -You will need to work hard to become a proficient data scientist in the -transport domain. The work presented in the table above will provide you -with the foundation you need to progress. But you will need to bring a -vital ingredient to the course: passion. - -## Homework for next week - -The first thing you should do is ensure that you have the timetable -stored safely in your calendar, so you do not miss important practicals -or seminars. - - -The second thing you should do is ensure that you have the necessary -software installed on your computer and that you have tested that you -can use it for the datasets we will be using in the course. **You must -read and try to complete the exercises in Chapters 1 to 4 of the book -Reproducible Road Safety Research with R.** Most important section of -those 4 chapters is Section 1.5, which explains how to install R and -RStudio. This is not a theoretical section: I suggest you follow the -instructions here ASAP: - - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: - -- Chapter 3 on using RStudio: - -- Chapter 4 on installing R packages, **make sure you can install - packages such as sf**: - -The third and final thing to do is to *engage* with the reading content -and platforms associated with this module. See the reading list -associated with the module at -. -Read-up on the references and links provided in the References section -below. Check out the course’s online home on the GitHub platform at -. (Fun fact, you can find the source -code underlying this message -[here](https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.Rmd).) -To collaborate on GitHub you should sign-up to get a GitHub account -here: After you have a GitHub account you can ask -questions by opening an ‘Issue’ in the project’s repository, a common -way for professional data scientists to communicate about code and their -work: And ensure you can -access the Transport Data Science Team on Microsoft Teams and please do -say hello, as shown below. - - - -And please do ask any questions do ask, either via email or preferably -on -[Minerva](https://minerva.leeds.ac.uk/ultra/courses/_542966_1/outline) -or via a GitHub issue as described above. It’s always better ask than to -‘suffer in silence!’ - -I really look forward to working with you all to make this a successful -and enjoyable module. - -# References - -
- -
- -Lovelace, Robin. 2021. “Open Source Tools for Geographic Analysis in -Transport Planning.” *Journal of Geographical Systems*, January. -. - -
- -
- -Rodrigue, Jean-Paul, Claude Comtois, and Brian Slack. 2013. *The -Geography of Transport Systems*. Third. London, New York: Routledge. - -
- -
diff --git a/messages/2023/references.bib b/messages/2023/references.bib deleted file mode 100644 index db22c89..0000000 --- a/messages/2023/references.bib +++ /dev/null @@ -1,22 +0,0 @@ -@article{anda2017transport, - title={Transport modelling in the age of big data}, - author={Anda, Cuauhtemoc and Erath, Alexander and Fourie, Pieter Jacobus}, - journal={International Journal of Urban Sciences}, - volume={21}, - number={sup1}, - pages={19--42}, - year={2017}, - publisher={Taylor \& Francis} -} - -@article{peyre2019computational, - title={Computational optimal transport: With applications to data science}, - author={Peyr{\'e}, Gabriel and Cuturi, Marco and others}, - journal={Foundations and Trends{\textregistered} in Machine Learning}, - volume={11}, - number={5-6}, - pages={355--607}, - year={2019}, - publisher={Now Publishers, Inc.} -} - diff --git a/messages/2023/seminar-workshop.Rmd b/messages/2023/seminar-workshop.Rmd deleted file mode 100644 index 26829de..0000000 --- a/messages/2023/seminar-workshop.Rmd +++ /dev/null @@ -1,60 +0,0 @@ ---- -output: github_document ---- - - -# Seminar 2: Geographic data in the transport planning industry - -This seminar will be in 2 parts - -## Part 1: seminar - -- Talk by Qian Fu and questions - -## Part 2: coursework - -### Option 1: working throught the coursework - -- Talk over updated dissertation guidance, available here: https://github.com/ITSLeeds/TDS/blob/master/coursework-template.md#coursework-guidance ~10 min - -- Questions from the group about the coursework ~ 5 m - -- Practical work: using RMarkdown to write a coursework topic and technical questions - see https://bookdown.org/yihui/rmarkdown-cookbook/ - 15 min - -### Option 2: accessing OSM data with pydriosm - -Try installing the package and follow some of the online documentation: https://colab.research.google.com/drive/11trWfJZ_opp2r4GiksRg19MhFgAsoR3A#scrollTo=eCspbh5wuWWB - -```bash -docker run -it ghcr.io/geocompx/docker:mamba /bin/bash -micromamba install pydriosm -micromamba install osm2rail -pip install pydriosm - -``` - -### Option 3: Getting rail data with R - -```{r, eval=FALSE} -library(osmextract) -# ?oe_get -# example from help: "SELECT * FROM 'lines' WHERE oneway == 'yes'" - -q = "SELECT * FROM 'lines' WHERE railway == 'rail'" -# Try for a small area: -railways_iow = oe_get(place = "isle of wight", query = q, extra_tags = "railway") -nrow(railways_iow) -plot(railways_iow$geometry) -railways_england = oe_get(place = "England", query = q, extra_tags = "railway") -plot(railways_england$geometry) -``` - - -```{python} - -``` - - - - - diff --git a/messages/2023/seminar-workshop.md b/messages/2023/seminar-workshop.md deleted file mode 100644 index 3f9ef75..0000000 --- a/messages/2023/seminar-workshop.md +++ /dev/null @@ -1,50 +0,0 @@ - -# Seminar 2: Geographic data in the transport planning industry - -This seminar will be in 2 parts - -## Part 1: seminar - -- Talk by Qian Fu and questions - -## Part 2: coursework - -### Option 1: working throught the coursework - -- Talk over updated dissertation guidance, available here: - - ~10 min - -- Questions from the group about the coursework ~ 5 m - -- Practical work: using RMarkdown to write a coursework topic and - technical questions - see - - 15 min - -### Option 2: accessing OSM data with pydriosm - -Try installing the package and follow some of the online documentation: - - -``` bash -docker run -it ghcr.io/geocompx/docker:mamba /bin/bash -micromamba install pydriosm -micromamba install osm2rail -pip install pydriosm -``` - -### Option 3: Getting rail data with R - -``` r -library(osmextract) -# ?oe_get -# example from help: "SELECT * FROM 'lines' WHERE oneway == 'yes'" - -q = "SELECT * FROM 'lines' WHERE railway == 'rail'" -# Try for a small area: -railways_iow = oe_get(place = "isle of wight", query = q, extra_tags = "railway") -nrow(railways_iow) -plot(railways_iow$geometry) -railways_england = oe_get(place = "England", query = q, extra_tags = "railway") -plot(railways_england$geometry) -``` diff --git a/messages/ai-guidance.md b/messages/ai-guidance.md deleted file mode 100644 index e28490a..0000000 --- a/messages/ai-guidance.md +++ /dev/null @@ -1,66 +0,0 @@ - - -# Message sent in 2024 on use of AI in the module - -Hi all, - -This is just a message to say that the assessment has been categorised -as “amber” in terms of AI, as highlighted in the attached coversheet (in -docx and pdf format, and in the coursework template which can be found -at https://github.com/ITSLeeds/TDS/blob/master/coursework-template.Rmd -and a PDF version of which can be found attached). - -Using the template should save you time: no need to combine PDFs because -the coversheet is already included in the template. Note: the 10 page -limit is for the main body of the report, and does not include the -coversheet, references, or appendices. - -Please do ensure that you are progressing on your coursework: you should -be adding content to your .Rmd files and then knitting them to PDFs to -check that they look as you expect. Do not leave this to the last -minute, as it can be time-consuming to fix issues with the formatting. - -Any questions, just let me know via email, on Teams, or on GitHub. - -All the best, - -Robin - -P.s. see below the details of the categorisation of the assessment as -“amber” in terms of AI. - -Under this the amber category, AI tools can be used in this assessment -in an assistive role for the specifically defined processes.   - -In this assessment, AI tools CAN be utilised in an assistive role to:   - -- Act as a support tutor to aid in the research of a topic. -- Testing and debugging of any code you produce yourself as part of the - assignment.  -- Draft and structure your piece of work once you have worked through - the problem.  -- Provide ideas or inspiration to help you overcome a creative block.  -- Give feedback on content or provide proof reading of content that you - have generated yourself.  - -In this assessment, AI tools CANNOT be utilised to:   - -- For any other functions beyond those listed above. In particular, you - must not use Gen AI to produce any of the final text, or other - content, that you submit for assessment. - -The use of Generative AI must be acknowledged in an ‘Acknowledgements’ -section of any piece of academic work where it has been used as a -functional tool to assist in the process of creating academic work. The -minimum requirement to include in acknowledgement:   - -- Name and version of the generative AI system used (e.g. ChatGPT-4.0) -- The publisher/company that produced the Gen AI used (e.g. Open AI) -- URL of the AI system  -- Brief description (single sentence) of context in which the tool was - used.  - -The standard Academic Misconduct procedure applies for students believed -to have ignored this categorisation. For detailed guidance see -https://generative-ai.leeds.ac.uk/ai-and -assessments/categories-of-assessments/  diff --git a/messages/ai-guidance.qmd b/messages/ai-guidance.qmd deleted file mode 100644 index d8b45b8..0000000 --- a/messages/ai-guidance.qmd +++ /dev/null @@ -1,47 +0,0 @@ ---- -format: gfm ---- - -# Message sent in 2024 on use of AI in the module - - -Hi all, - -This is just a message to say that the assessment has been categorised as "amber" in terms of AI, as highlighted in the attached coversheet (in docx and pdf format, and in the coursework template which can be found at https://github.com/ITSLeeds/TDS/blob/master/coursework-template.Rmd and a PDF version of which can be found attached). - -Using the template should save you time: no need to combine PDFs because the coversheet is already included in the template. Note: the 10 page limit is for the main body of the report, and does not include the coversheet, references, or appendices. - -Please do ensure that you are progressing on your coursework: you should be adding content to your .Rmd files and then knitting them to PDFs to check that they look as you expect. Do not leave this to the last minute, as it can be time-consuming to fix issues with the formatting. - -Any questions, just let me know via email, on Teams, or on GitHub. - -All the best, - -Robin - -P.s. see below the details of the categorisation of the assessment as "amber" in terms of AI. - -Under this the amber category, AI tools can be used in this assessment in an assistive role for the specifically defined processes.   - -In this assessment, AI tools CAN be utilised in an assistive role to:   - -- Act as a support tutor to aid in the research of a topic. -- Testing and debugging of any code you produce yourself as part of the assignment.  -- Draft and structure your piece of work once you have worked through the problem.  -- Provide ideas or inspiration to help you overcome a creative block.  -- Give feedback on content or provide proof reading of content that you have generated yourself.  - -In this assessment, AI tools CANNOT be utilised to:   - -- For any other functions beyond those listed above. In particular, you must not use Gen AI to produce any of the final text, or other content, that you submit for assessment. - -The use of Generative AI must be acknowledged in an ‘Acknowledgements’ section of any piece of academic work where it has been used as a functional tool to assist in the process of creating academic work. The minimum requirement to include in acknowledgement:   - -- Name and version of the generative AI system used (e.g. ChatGPT-4.0) -- The publisher/company that produced the Gen AI used (e.g. Open AI) -- URL of the AI system  -- Brief description (single sentence) of context in which the tool was used.  - -The standard Academic Misconduct procedure applies for students believed to have ignored this categorisation. For detailed guidance see https://generative-ai.leeds.ac.uk/ai-and assessments/categories-of-assessments/  - - diff --git a/messages/welcome.md b/messages/welcome.md deleted file mode 100644 index af6b61a..0000000 --- a/messages/welcome.md +++ /dev/null @@ -1,176 +0,0 @@ - - - - -Dear Students, - -## Welcome - -Welcome to Transport Data Science. In this module you will learn data -science skills and how to apply them to solve real world problems, with -a focus on transport planning. Transport is by definition a geographic -phenomenon, meaning the movement from one place to another: “the purpose -of transportation is to overcome space” (Rodrigue, Comtois, and Slack -2013). - -Becoming good at transport data science therefore means becoming good at -data science in general and geographic data science in particular -(Lovelace, Nowosad, and Muenchow 2019). 2024 is an exciting time to be -learning data science for transport applications. A wide range of new -and open source tools, combined with unprecedented amounts of new data, -enable researchers to visualise/model/understand transport systems in -more breadth and depth than ever before. - -## About the course - -Based on an understanding of the evolving nature of data science as a -collaborative and open source code-driven enterprise, this module takes -a ‘learning by doing’ approach, with the content delivered as - -- 6 \* 3 hour practicals -- 2 \* 1 hour seminars, followed by practical sessions -- feedback on your work - -See the module timetable at -https://mytimetable.leeds.ac.uk/link?timetable.id=202324!module!D5179CB14D503D52757F4BE89B1C998B -and in the table here: -https://github.com/ITSLeeds/TDS#tds-transport-data-science - -You will need to work hard to become a proficient data scientist in the -transport domain. The work will provide you with the foundation you need -to progress. - -## Homework - -You need to do three things before the first session on Thursday 8th -February at 09:00: - -### 1. Install and test the software - -The course is taught with R and RStudio so you need to have these -installed on your computer before the first practical session. Follow -the instructions in the book Reproducible Road Safety Research with R, -which you can find here: -https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio - -If you learn better by watching videos, you can watch this video: -https://www.youtube.com/watch?v =YrEe2TLr3MI - -#### Can I use University of Leeds computers? - -The University of Leeds has RStudio on computers installed. However, we -strongly recommend that you install R and RStudio on your own computer -so that you can work on the course material outside of the practical -sessions. The RStudio installations on University of Leeds computers -have not recently been tested and in the past have caused problems for -students. - -#### Can I use a different IDE? - -You can use a different IDE (Integrated Development Environment) to -RStudio if you prefer (VS Code is the only other IDE I would recommend), -but you will need to be able to install R packages and run R scripts. If -you use VS Code, you will need to install the R extension for VS Code: -https://marketplace.visualstudio.com/items?i temName=Ikuyadeu.r I -recommend taking a read of this guide if you want to try out VS Code for -data science: https://github.com/RamiKrispin/vscode-r You can also use -Python, but you will need to be able to install Python packages and run -Python scripts. We strongly recommend using Quarto with VS Code for R, -Python and other languages, see here for more details: -https://quarto.org/docs/tools/vscode.html - -### 2. Read and try to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R - -The second thing you should do is ensure that you have the necessary -software installed on your computer and that you have tested that you -can use it for the datasets we will be using in the course. **You must -read and try to complete the exercises in Chapters 1 to 4 of the book -Reproducible Road Safety Research with R.** Most important section of -those 4 chapters is Section 1.5, which explains how to install R and -RStudio. This is not a theoretical section: I suggest you follow the -instructions here ASAP: -https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: - https://itsleeds.github.io/rrsrr/basics.html -- Chapter 3 on using RStudio: - https://itsleeds.github.io/rrsrr/rstudio.html -- Chapter 4 on installing R packages, **make sure you can install - packages such as sf**: https://itsleeds.github.io/rrsrr/pkgs.html - -### 3. Get set-up on GitHub and Microsoft Teams - -Teams and GitHub will be the main platforms we use for communication and -collaboration. Ensure you can access them as soon as possible. - -- Sign-up to get a GitHub account here: https://github.com/ -- Ensure you can access the Transport Data Science Team on Microsoft - Teams and please do say hello via this link: - https://teams.microsoft.com/l/channel/19%3a-IqQ19KOCzVzkbPWnQy8d4dUq0MspiN6Dv4BJYMr-ck1%40thread.tacv2/General?groupId=0e661005-2ad9-4aa2-a466-b3a1afa728c1&tenantId=bdeaeda8-c81d-45ce-863e-5232a535b7cb -- See the reading list associated with the module at - https://github.com/ITSLeeds/TDS/blob/master/catalogue.md#reading-list. -- Ensure you can access the module page on Minerva: - https://minerva.leeds.ac.uk/ultra/courses/\_551386_1/outline -- Answer the questionnaire on the Minerva module page (allow a few hours - to do this because it asks you to complete some tasks while you answer - the questions): **without completing this we cannot add you to the - course on GitHub.** - -## Contact - -If you have any questions, please do not hesitate to contact me via -email. My office hours are 10:00 to 12:00 on Mondays. - -I really look forward to working with you all to make this a successful -and enjoyable module. - -## Questionnaire - -Please complete the questionnaire on the Minerva module page: **without -completing this we cannot add you to the course on GitHub.** This -questionnaire assumes you have read and acted on the information in this -welcome message: -https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.md - -1. Have you installed R and RStudio on your computer, following the - instructions in the welcome message? - -2. Have you read and tried to complete the exercises in Chapters 1 to 4 - of the book Reproducible Road Safety Research with R? - -3. Have you signed up to GitHub? - -4. What is your GitHub username? - -5. Have you joined the Transport Data Science Team on Microsoft Teams? - -6. Have you got experience with R? - -7. Have you got experience with Python? - -8. Have you got experience with geographic data? - -9. What would you most like to get out of this course? - -# References - -
- -
- -Lovelace, Robin, Jakub Nowosad, and Jannes Muenchow. 2019. -*Geocomputation with R*. CRC Press. - -
- -
- -Rodrigue, Jean-Paul, Claude Comtois, and Brian Slack. 2013. *The -Geography of Transport Systems*. Third. London, New York: Routledge. - -
- -
diff --git a/messages/welcome.qmd b/messages/welcome.qmd deleted file mode 100644 index c316803..0000000 --- a/messages/welcome.qmd +++ /dev/null @@ -1,115 +0,0 @@ ---- -bibliography: ../tds.bib -format: gfm ---- - - - -```{r, include=FALSE} -library(tidyverse) -``` - -Dear Students, - -## Welcome - -Welcome to Transport Data Science. -In this module you will learn data science skills and how to apply them to solve real world problems, with a focus on transport planning. -Transport is by definition a geographic phenomenon, meaning the movement from one place to another: "the purpose of transportation is to overcome space" [@rodrigue_geography_2013]. - -Becoming good at transport data science therefore means becoming good at data science in general and geographic data science in particular [@lovelace_geocomputation_2019]. -2024 is an exciting time to be learning data science for transport applications. -A wide range of new and open source tools, combined with unprecedented amounts of new data, enable researchers to visualise/model/understand transport systems in more breadth and depth than ever before. - -## About the course - -Based on an understanding of the evolving nature of data science as a collaborative and open source code-driven enterprise, this module takes a 'learning by doing' approach, with the content delivered as - -- 6 \* 3 hour practicals -- 2 \* 1 hour seminars, followed by practical sessions -- feedback on your work - -See the module timetable at https://mytimetable.leeds.ac.uk/link?timetable.id=202324!module!D5179CB14D503D52757F4BE89B1C998B and in the table here: https://github.com/ITSLeeds/TDS#tds-transport-data-science - -You will need to work hard to become a proficient data scientist in the transport domain. -The work will provide you with the foundation you need to progress. - -## Homework - -You need to do three things before the first session on Thursday 8th February at 09:00: - -### 1. Install and test the software - -The course is taught with R and RStudio so you need to have these installed on your computer before the first practical session. -Follow the instructions in the book Reproducible Road Safety Research with R, which you can find here: https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio - -If you learn better by watching videos, you can watch this video: https://www.youtube.com/watch?v -=YrEe2TLr3MI - -#### Can I use University of Leeds computers? - -The University of Leeds has RStudio on computers installed. -However, we strongly recommend that you install R and RStudio on your own computer so that you can work on the course material outside of the practical sessions. -The RStudio installations on University of Leeds computers have not recently been tested and in the past have caused problems for students. - -#### Can I use a different IDE? - -You can use a different IDE (Integrated Development Environment) to RStudio if you prefer (VS Code is the only other IDE I would recommend), but you will need to be able to install R packages and run R scripts. -If you use VS Code, you will need to install the R extension for VS Code: https://marketplace.visualstudio.com/items?i -temName=Ikuyadeu.r I recommend taking a read of this guide if you want to try out VS Code for data science: https://github.com/RamiKrispin/vscode-r You can also use Python, but you will need to be able to install Python packages and run Python scripts. -We strongly recommend using Quarto with VS Code for R, Python and other languages, see here for more details: https://quarto.org/docs/tools/vscode.html - -### 2. Read and try to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R - -The second thing you should do is ensure that you have the necessary software installed on your computer and that you have tested that you can use it for the datasets we will be using in the course. -**You must read and try to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R.** Most important section of those 4 chapters is Section 1.5, which explains how to install R and RStudio. -This is not a theoretical section: I suggest you follow the instructions here ASAP: https://itsleeds.github.io/rrsrr/introduction.html#installing-r-and-rstudio - -Other key chapters in that resources are: - -- Chapter 2 on the basics of the statistical programming language R: https://itsleeds.github.io/rrsrr/basics.html -- Chapter 3 on using RStudio: https://itsleeds.github.io/rrsrr/rstudio.html -- Chapter 4 on installing R packages, **make sure you can install packages such as sf**: https://itsleeds.github.io/rrsrr/pkgs.html - -### 3. Get set-up on GitHub and Microsoft Teams - -Teams and GitHub will be the main platforms we use for communication and collaboration. -Ensure you can access them as soon as possible. - -- Sign-up to get a GitHub account here: https://github.com/ -- Ensure you can access the Transport Data Science Team on Microsoft Teams and please do say hello via this link: https://teams.microsoft.com/l/channel/19%3a-IqQ19KOCzVzkbPWnQy8d4dUq0MspiN6Dv4BJYMr-ck1%40thread.tacv2/General?groupId=0e661005-2ad9-4aa2-a466-b3a1afa728c1&tenantId=bdeaeda8-c81d-45ce-863e-5232a535b7cb -- See the reading list associated with the module at https://github.com/ITSLeeds/TDS/blob/master/catalogue.md#reading-list. -- Ensure you can access the module page on Minerva: https://minerva.leeds.ac.uk/ultra/courses/_551386_1/outline -- Answer the questionnaire on the Minerva module page (allow a few hours to do this because it asks you to complete some tasks while you answer the questions): **without completing this we cannot add you to the course on GitHub.** - -## Contact - -If you have any questions, please do not hesitate to contact me via email. -My office hours are 10:00 to 12:00 on Mondays. - -I really look forward to working with you all to make this a successful and enjoyable module. - -## Questionnaire - -Please complete the questionnaire on the Minerva module page: **without completing this we cannot add you to the course on GitHub.** -This questionnaire assumes you have read and acted on the information in this welcome message: https://github.com/ITSLeeds/TDS/blob/master/messages/welcome.md - -1. Have you installed R and RStudio on your computer, following the instructions in the welcome message? - -2. Have you read and tried to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R? - -3. Have you signed up to GitHub? - -4. What is your GitHub username? - -5. Have you joined the Transport Data Science Team on Microsoft Teams? - -6. Have you got experience with R? - -7. Have you got experience with Python? - -8. Have you got experience with geographic data? - -9. What would you most like to get out of this course? - -# References \ No newline at end of file diff --git a/p1/index.qmd b/p1/index.qmd new file mode 100644 index 0000000..f8d4181 --- /dev/null +++ b/p1/index.qmd @@ -0,0 +1,257 @@ +--- +title: "Practical 1: Introduction to Transport Data Science" +bibliography: ../tds.bib +toc: true +execute: + cache: true +--- + +## Agenda {.unnumbered} + +1. Lecture: an introduction to Transport Data Science (30 min) +2. Q&A (15 min) + +3. Break and networking (15 min) + +4. Data science and a good research question (30 min) +5. Data science foundations (guided): Project set-up and using RStudio or VS Code as an integrated development environment (30 min) +6. Focussed work (1 hr) + - Working through the questions on processing OD data and running the code in Sections 13.1 to 13.4 the Transport chapter of Geocomputation with R and answering the questions for the Bristol dataset + + +# What is transport data science and thinking of a good research question {.unnumbered} + +- Based on the contents of the lecture, come up with *your own* definition of data science +- How do you see yourself using data science over the next 5 years? +- Think of a question about a transport system you know well and how data science could help answer it, perhaps with reference to a sketch like that below + +## How to come up with a good research question {.unnumbered} + +- Think about the data you have access to +- Think about the problems you want to solve +- Think about the methods you want to use and skills you want to learn +- Think about how the final report will look and hold-together + +#### How much potential is there for cycling across the transport network? {.unnumbered} + +![](https://user-images.githubusercontent.com/1825120/127524923-7d9f5511-84a6-430b-8de9-a603a5524f39.png) + +#### How can travel to schools be made safer? {.unnumbered} + +#### How can hospitals encourage visitors to get there safely? {.unnumbered} + +#### Where's the best place to build electric car charging points? {.unnumbered} + +See [openstreetmap.org](https://www.openstreetmap.org/#map=19/53.80689/-1.55637) or search for other open access datasets for more ideas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +```{r schematic, echo=FALSE} +# knitr::include_graphics("https://raw.githubusercontent.com/npct/pct-team/master/flow-model/flow-diag2.png") +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +```{r, echo=FALSE} +# Identify available datasets and access and clean them +# Combine datasets from multiple sources +# Understand what machine learning is, which problems it is appropriate for compared with traditional statistical approaches, and how to implement machine learning techniques +# Visualise and communicate the results of transport data science, and know about setting-up interactive web applications +# Deciding when to use local computing power vs cloud services +``` + + + +# Data Science foundations + +**Read and try to complete the exercises in Chapters 1 to 4 of the book Reproducible Road Safety Research with R.** It assumes that you have recently updated R and RStudio on your computer. +For details on installing packages see here: https://docs.ropensci.org/stats19/articles/stats19-training-setup.html + +- Create a new folder (or R project with RStudio) called 'practical1' +- In it create file called foundations.qmd +- Type the following + +![](images/paste-1.png) + +- Knit the document by pressing Ctrl+Shift+K, with the 'Knit' button in RStudio, or by typing `quarto render foundations.qmd` in the PowerShell or Terminal console, the result should look like this: + +This is some text: + +```{r} +casualty_type = c("pedestrian", "cyclist", "cat") +casualty_age = seq(from = 20, to = 60, by = 20) +crashes = data.frame(casualty_type, casualty_age) +``` + +- + +```{r, echo=FALSE} +casualty_type = c("pedestrian", "cyclist", "cat") +casualty_age = seq(from = 20, to = 60, by = 20) +crashes = data.frame(casualty_type, casualty_age) +vehicle_type = c("car", "bus", "tank") +crashes$vehicle_type = vehicle_type +``` + +We now have a data frame object stored in memory (technically in the global environment) that is used as the basis of the questions. + +To get some larger datasets, try the following (from Chapter 8 of RSRR) + +::: {.panel-tabset group="language"} +## R + +```{r} +library(stats19) +ac = get_stats19(year = 2019, type = "collision") +ca = get_stats19(year = 2019, type = "cas") +ve = get_stats19(year = 2019, type = "veh") +``` + +## Python + +``` python +# pip install stats19 +import stats19 +ac = stats19.get_stats19(year = 2019, type = "collision") +ca = stats19.get_stats19(year = 2019, type = "cas") +ve = stats19.get_stats19(year = 2019, type = "veh") +``` +::: + +2.3.1. +Use the `$` operator to print the `vehicle_type` column of `crashes`. + +``` +- In R the `$` symbol is used to refer to elemements of a list. So the answer is simply `crashes$vehicle_type` +``` + +2.3.2. +Subsetting the crashes with the `[,]` syntax + +``` +- Try out different combinations on the dataframe +``` + +2.3.3. +**Bonus**: what is the `class()` of the objects created by each of the previous exercises? + +``` +- Explore how many R classes you can find +``` + +Let's go through these exercises together: + +1. Subset the `casualty_age` object using the inequality (`<`) so that only elements less than 50 are returned. +2. Subset the `crashes` data frame so that only tanks are returned using the `==` operator. +3. **Bonus**: assign the age of all tanks to 61. + +- Try running the subsetting code on a larger dataset, e.g. the `ac` object created previously + +1. Coerce the `vehicle_type` column of `crashes` to the class `character`. +2. Coerce the `crashes` object into a matrix. What happened to the values? +3. **Bonus:** What is the difference between the output of `summary()` on `character` and `factor` variables? + +- We'll explore this together + + + + + +# Data Science foundations + +Work through [Chapter 13](https://r.geocompx.org/transport.html) of the book Geocomputation with R, taking care to ask questions about any aspects that you don't understand (your homework will be to complete and make notes on the chapter, including reproducible code). + +# Homework + +- Complete working through Chapter 13 of the Geocomputation with R book. Make notes in a .qmd file that you can bring to the class to show colleagues and the instructor next week. + + diff --git a/p2/index.qmd b/p2/index.qmd new file mode 100644 index 0000000..cad0b19 --- /dev/null +++ b/p2/index.qmd @@ -0,0 +1,742 @@ +--- +title: "Origin-destination data" +# subtitle: '
Practical' +# author: "Malcolm Morgan and Robin Lovelace" +# date: 'University of Leeds `r # Sys.Date()()`
' +toc: true +execute: + cache: true + #eval: true + warning: false + message: false +bibliography: ../tds.bib +#jupyter: python3 +engine: knitr +--- + +# Review Homework + +You should now be familiar with the basics of R and the `tidyverse`. If you have not completed these tasks go back and do them first: + +- Read Chapters 2, 3, and 4 of [Reproducible road safety research with R](https://itsleeds.github.io/rrsrr/basics.html) +- Read Chapters 3 and 5 of [R for Data Science](https://r4ds.had.co.nz/data-visualisation.html) + +# Getting started with GIS in R + +Note that this practical takes sections from Chapters 2 - 8 of [Geocomputation with R](https://r.geocompx.org). You should expand your knowledge by reading these chapters in full. + + +## Pre-requisites {-} + +You need to have a number of packages installed and loaded. +Install the packages by typing in the following commands into RStudio (you do not need to add the comments after the `#` symbol) + +If you need to install any of these packages use: + +::: {.panel-tabset} + +## R + +```{r} +#| eval: false +if (!require("pak")) install.packages("pak") +pak::pkg_install(c("sf", "tidyverse", "remotes")) +# GitHub pkgs +pak::pkg_install("Nowosad/spDataLarge") +``` + +``` {r} +library(sf) # vector data package +library(tidyverse) # tidyverse packages +library(spData) # spatial data package +``` + +## Python + +``` {python} +#| eval: false +# Install necessary packages (uncomment if not already installed) +# !pip install geopandas pandas matplotlib seaborn + +import geopandas as gpd # vector data package +import pandas as pd # data manipulation +import matplotlib.pyplot as plt # plotting +import seaborn as sns # advanced plotting +# For spatial data, geopandas comes with sample datasets +# Alternatively, we can use the naturalearth datasets +import geopandas.datasets +``` + +::: + +1. Check your packages are up-to-date with `update.packages()` in R (or equivalent in Python) +1. Create a project folder with an appropriate name for this session (e.g. `practical2`) +1. Create appropriate folders for code, data and anything else (e.g. images) +1. Create a script called `learning-OD.R`, e.g. with the following command: + +```sh +mkdir code +code code/learning-OD.R # for R +code code/learning-OD.py # for Python +``` + + + +## Basic sf operations + +We will start with a simple map of the world. Load the `world` object from the `spData` package. Notice the use of `::` to say that you want the `world` object from the `spData` package. + +::: {.panel-tabset} +## R +```{r} +#| echo: true +#| output: false +world = spData::world +``` + +## Python +```{python} +#| eval: false +world = gpd.read_file( + 'https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_countries.zip' +) +``` +::: + +Use some basic R functions to explore the `world` object. e.g. `class(world)`, `dim(world)`, `head(world)`, `summary(world)`. Also view the `world` object by clicking on it in the Environment panel. + +`sf` objects can be plotted with `plot()`. + + +::: {.panel-tabset} +## R +```{r} +#| warning: false +plot(world) +``` + +## Python +```{python} +#| eval: false +print(type(world)) # Equivalent to class(world) +print(world.shape) # Equivalent to dim(world) +print(world.head()) # Equivalent to head(world) +print(world.describe()) # Equivalent to summary(world) + +# Plotting the world GeoDataFrame +world.plot(figsize=(12, 8)) +plt.title('World Map') +plt.show() +``` +::: +Note that this makes a map of each column in the data frame. Try some other plotting options + +::: {.panel-tabset} +## R +```{r} +plot(world[3:6]) +plot(world["pop"]) +``` + +## Python +```{python} +#| eval: false +# Since world is a GeoDataFrame, we can select columns by position +# However, GeoPandas plots the geometry, so we need to specify columns +fig, axes = plt.subplots(1, 3, figsize=(15, 5)) +world.plot(column='POP_EST', ax=axes[0]) +world.plot(column='GDP_YEAR', ax=axes[1]) +world.plot(column='CONTINENT', ax=axes[2]) +plt.show() +``` +::: + +## Basic spatial operations + +Load the `nz` and `nz_height` datasets from the `spData` package. + + + +::: {.panel-tabset} +## R +```{r} +#| echo: true +#| output: false +nz = spData::nz +nz_height = spData::nz_height +``` +## Python +```{python} +#| eval: false +nz = gpd.read_file("https://github.com/Nowosad/spData_files/raw/refs/heads/main/data/nz.gpkg") +nz_height = gpd.read_file("https://github.com/Nowosad/spData_files/raw/refs/heads/main/data/nz_height.gpkg") +``` +::: + +We can use `tidyverse` functions like `filter` and `select` on `sf` objects in the same way you did in Practical 1. + + +::: {.panel-tabset} +## R +```{r} +#| echo: true +#| output: false +canterbury = nz |> filter(Name == "Canterbury") +canterbury_height = nz_height[canterbury, ] +``` + +## Python +```{python} +#| eval: false +canterbury = nz[nz['Name'] == 'Canterbury'] +``` +::: +In this case we filtered the `nz` object to only include places called `Canterbury` and then did and intersection to find objects in the `nz_height` object that are in Canterbury. + +This syntax is not very clear. But is the equivalent to + +::: {.panel-tabset} +## R +```{r} +#| echo: true +#| eval: false +canterbury_height = nz_height[canterbury, , op = st_intersects] +``` + +## Python +```{python} +#| eval: false +canterbury_height = gpd.overlay(nz_height, canterbury, how='intersection') +``` +::: + +There are many different types of relationships you can use with `op`. Try `?st_intersects()` to see more. For example this would give all the places not in Canterbury + +::: {.panel-tabset} +## R +```{r} +#| eval: false +nz_height[canterbury, , op = st_disjoint] +``` + +## Python +```{python} +#| eval: false +canterbury_height = gpd.sjoin(nz_height, canterbury, op='intersects') +``` +::: + + +![Topological relations between vector geometries, inspired by Figures 1 and 2 in Egenhofer and Herring (1990). The relations for which the function(x, y) is true are printed for each geometry pair, with x represented in pink and y represented in blue. The nature of the spatial relationship for each pair is described by the Dimensionally Extended 9-Intersection Model string. ](https://r.geocompx.org/figures/relations-1.png) + + +# Getting started with OD data + +In this section we will look at basic transport data in the R package **stplanr**. + +Load the `stplanr` package as follows: + + +```{r} +#| echo: true +#| output: false +library(stplanr) +``` + +The `stplanr` package contains some data that we can use to demonstrate principles in Data Science, illustrated in the Figure below. Source: Chapter 1 of R for Data Science [@grolemund_r_2016] [available online](https://r4ds.had.co.nz/introduction.html). + +![](https://d33wubrfki0l68.cloudfront.net/571b056757d68e6df81a3e3853f54d3c76ad6efc/32d37/diagrams/data-science.png) + +First we will load some sample data: + +::: {.panel-tabset} +## R +```{r} +#| echo: true +od_data = stplanr::od_data_sample +zone = stplanr::cents_sf +``` + +## Python +```{python} +#| eval: false +import pandas as pd +od_data = pd.read_csv('https://github.com/ropensci/stplanr/releases/download/v1.2.2/od_data_sample.csv') +``` +::: + +You can click on the data in the environment panel to view it or use `head(od_data)` +Now we will rename one of the columns from `foot` to `walk` + +::: {.panel-tabset} +## R +```{r} +#| echo: true +od_data = od_data |> + rename(walk = foot) +``` + +## Python +```{python} +#| eval: false +od_data.rename(columns={'foot': 'walk'}, inplace=True) +``` +::: + +Next we will made a new dataset `od_data_walk` by taking `od_data` and piping it (`|>`) to `filter` the data frame to only include rows where `walk > 0`. Then `select` a few of the columns and calculate two new columns `proportion_walk` and `proportion_drive`. + +::: {.panel-tabset} + +## R +```{r} +#| echo: true +od_data_walk = od_data |> + filter(walk > 0) |> + select(geo_code1, geo_code2, all, car_driver, walk) |> + mutate(proportion_walk = walk / all, proportion_drive = car_driver / all) +``` + +## Python +```{python} +#| eval: false +od_data_walk = od_data[od_data['walk'] > 0].copy() +od_data_walk = od_data_walk[['geo_code1', 'geo_code2', 'all', 'car_driver', 'walk']] +od_data_walk['proportion_walk'] = od_data_walk['walk'] / od_data_walk['all'] +od_data_walk['proportion_drive'] = od_data_walk['car_driver'] / od_data_walk['all'] +``` +::: +We can use the generic `plot` function to view the relationships between variables + +::: {.panel-tabset} +## R +```{r} +plot(od_data_walk) +``` + +## Python +```{python} +#| eval: false +sns.pairplot(od_data_walk) +plt.show() +``` +::: +R has built in modelling functions such as `lm` lets make a simple model to predict the proportion of people who walk based on the proportion of people who drive. + + +::: {.panel-tabset} +## R +```{r} +#| echo: true +model1 = lm(proportion_walk ~ proportion_drive, data = od_data_walk) +od_data_walk$proportion_walk_predicted = model1$fitted.values +``` + +## Python +```{python} +#| eval: false +# pip install statsmodels +import statsmodels.formula.api as smf + +model1 = smf.ols('proportion_walk ~ proportion_drive', data=od_data_walk).fit() +od_data_walk['proportion_walk_predicted'] = model1.fittedvalues +``` +::: + +We can use the `ggplot2` package to graph our model predictions. + +::: {.panel-tabset} +## R +```{r} +ggplot(od_data_walk) + + geom_point(aes(proportion_drive, proportion_walk)) + + geom_line(aes(proportion_drive, proportion_walk_predicted)) +``` + +## Python +```{python} +#| eval: false +plt.figure(figsize=(8, 6)) +plt.scatter(od_data_walk['proportion_drive'], od_data_walk['proportion_walk'], label='Observed') +plt.plot(od_data_walk['proportion_drive'], od_data_walk['proportion_walk_predicted'], color='red', label='Predicted') +plt.xlabel('Proportion Drive') +plt.ylabel('Proportion Walk') +plt.legend() +plt.show() +``` +::: + +Exercises + + +1. What is the class of the data in `od_data`? +2. Subset (filter) the data to only include OD pairs in which at least one person (`> 0`) person walks (bonus: on what % of the OD pairs does at least 1 person walk?) +3. Calculate the percentage who cycle in each OD pair in which at least 1 person cycles +4. Is there a positive relationship between walking and cycling in the data? +5. Bonus: use the function `od2line()` in to convert the OD dataset into geographic desire lines + + +Codes for Exercises + +:::: {.panel-tabset} +## Exercise 1 +::: {.panel-tabset} +### R +```{r} +#| eval: false +class(od_data) +``` +### Python +```{python} +#| eval: false +print("Class of od_data:", type(od_data)) +``` +::: +## Exercise 2 +::: {.panel-tabset} +### R +```{r} +#| eval: false + +od_data_walk = od_data |> + filter(walk > 0) +nrow(od_data_walk) / nrow(od_data) * 100 +``` +### Python +```{python} +#| eval: false +od_data_walk = od_data[od_data['walk'] > 0].copy() +percentage_walk = (len(od_data_walk) / len(od_data)) * 100 +print(f"Percentage of OD pairs where at least one person walks: {percentage_walk}%") +``` +::: +## Exercise 3 +::: {.panel-tabset} +### R +```{r} +#| eval: false +#| +od_data = od_data |> + filter(bicycle > 0) |> + mutate(perc_cycle = (bicycle/all) * 100) +``` + +### Python +```{python, eval=FALSE} +#| eval: false +od_data_cycle = od_data[od_data['bicycle'] > 0].copy() +od_data_cycle['perc_cycle'] = (od_data_cycle['bicycle'] / od_data_cycle['all']) * 100 +``` +::: + +## Exercise 4 +::: {.panel-tabset} +### R +```{r} +#| eval: false + +od_data_new = od_data |> + filter(walk > 0, bicycle>0 ) |> + select(bicycle, walk, all) + +model = lm(walk ~ bicycle, weights = all, data = od_data_new) +od_data_new$walk_predicted = model$fitted.values + +ggplot(od_data_new) + + geom_point(aes(bicycle, walk, size = all)) + + geom_line(aes(bicycle, walk_predicted)) +``` + +### Python +```{python} +#| eval: false +od_data_new = od_data[(od_data['walk'] > 0) & (od_data['bicycle'] > 0)].copy() +od_data_new = od_data_new[['bicycle', 'walk', 'all']] + +# Weighted linear regression +import statsmodels.api as sm + +weights = od_data_new['all'] +X = sm.add_constant(od_data_new['bicycle']) +wls_model = sm.WLS(od_data_new['walk'], X, weights=weights) +results = wls_model.fit() +od_data_new['walk_predicted'] = results.fittedvalues + +# Plotting the relationship +plt.figure(figsize=(8, 6)) +plt.scatter(od_data_new['bicycle'], od_data_new['walk'], s=od_data_new['all']*0.1, label='Data') +plt.plot(od_data_new['bicycle'], od_data_new['walk_predicted'], color='red', label='Fitted Line') +plt.xlabel('Bicycle') +plt.ylabel('Walk') +plt.legend() +plt.show() +``` +::: + +## Exercise 5 + +::: {.panel-tabset} +### R +```{r} +#| eval: false + +desire_lines = stplanr::od2line(flow = od_data, zones = zone) +plot(desire_lines) +#save zone as gpkg +sf::st_write(zone, "zone.geojson") +``` + +### Python +```{python} +#| eval: false +import pandas as pd +import geopandas as gpd +from shapely.geometry import LineString + +od_data = pd.read_csv('https://github.com/ropensci/stplanr/releases/download/v1.2.2/od_data_sample.csv') + +zones = gpd.read_file('https://github.com/ropensci/stplanr/releases/download/v1.2.2/zones.geojson') + +# Ensure the CRS is set (replace 'epsg:4326' with your actual CRS if different) +if zones.crs is None: + zones.set_crs(epsg=4326, inplace=True) + +# If zones are polygons, compute centroids +if zones.geom_type.isin(['Polygon', 'MultiPolygon']).any(): + print("Creating centroids representing desire line start and end points.") + zones['geometry'] = zones.centroid + +# Create a mapping from 'geo_cod' to 'geometry' +geo_cod_to_geometry = dict(zip(zones['geo_cod'], zones['geometry'])) + +# Map origin and destination geometries +od_data['geometry_o'] = od_data['geo_code1'].map(geo_cod_to_geometry) +od_data['geometry_d'] = od_data['geo_code2'].map(geo_cod_to_geometry) + +# Check for any missing matches +missing_origins = od_data[od_data['geometry_o'].isnull()] +missing_destinations = od_data[od_data['geometry_d'].isnull()] + +if not missing_origins.empty: + print(f"Missing origin geometries for {len(missing_origins)} records") +if not missing_destinations.empty: + print(f"Missing destination geometries for {len(missing_destinations)} records") + +# Remove rows with missing geometries +od_data.dropna(subset=['geometry_o', 'geometry_d'], inplace=True) + +# Create LineString geometries for desire lines +od_data['geometry'] = od_data.apply( + lambda row: LineString([row['geometry_o'], row['geometry_d']]), axis=1 +) + +# Create a GeoDataFrame for the desire lines +desire_lines = gpd.GeoDataFrame(od_data, geometry='geometry', crs=zones.crs) + +# Plot the desire lines +desire_lines.plot() +``` +::: +:::: + +# Processing origin-destination data in Bristol + +This section is based on [Chapter 12 of Geocomputation with R](https://geocompr.robinlovelace.net/transport.html). You should read this chapter in full in your own time. + +We need the `stplanr` package which provides many useful functions for transport analysis and `tmap` package which enables advanced mapping features. + + +```{r} +#| echo: true +#| output: false +#| eval: true +library(stplanr) +library(tmap) +``` + + +We will start by loading two datasets: + +::: {.panel-tabset} +## R +```{r} +od = spDataLarge::bristol_od +zones = spDataLarge::bristol_zones +``` + +## Python +```{python} +#| eval: false +od_data = gpd.read_file('https://github.com/ropensci/stplanr/releases/download/v1.2.2/bristol_od.geojson') + +zones = gpd.read_file('https://github.com/ropensci/stplanr/releases/download/v1.2.2/bristol_zones.geojson') + +if zones.crs is None: + zones.set_crs(epsg=4326, inplace=True) + +# If zones are polygons, compute centroids +if zones.geom_type.isin(['Polygon', 'MultiPolygon']).any(): + print("Creating centroids representing desire line start and end points.") + zones['geometry'] = zones.centroid + +# Create a mapping from 'geo_cod' to 'geometry' +geo_cod_to_geometry = dict(zip(zones['geo_code'], zones['geometry'])) + +# Map origin and destination geometries +od_data['geometry_o'] = od_data['geo_code1'].map(geo_cod_to_geometry) +od_data['geometry_d'] = od_data['geo_code2'].map(geo_cod_to_geometry) + +# Check for any missing matches +missing_origins = od_data[od_data['geometry_o'].isnull()] +missing_destinations = od_data[od_data['geometry_d'].isnull()] + +if not missing_origins.empty: + print(f"Missing origin geometries for {len(missing_origins)} records") +if not missing_destinations.empty: + print(f"Missing destination geometries for {len(missing_destinations)} records") + +# Remove rows with missing geometries +od_data.dropna(subset=['geometry_o', 'geometry_d'], inplace=True) + +# Create LineString geometries for desire lines +od_data['geometry'] = od_data.apply( + lambda row: LineString([row['geometry_o'], row['geometry_d']]), axis=1 +) + +# Create a GeoDataFrame for the desire lines +desire_lines = gpd.GeoDataFrame(od_data, geometry='geometry', crs=zones.crs) + +# Plot the desire lines +desire_lines.plot() +``` +::: +Explore these datasets using the functions you have already learnt (e.g. `head`,`nrow`). + +You will notice that the `od` datasets has shared id values with the `zones` dataset. We can use these to make desire lines between each zone. But first we must filter out trips that start and end in the same zone. + +::: {.panel-tabset} +## R +```{r, echo = T, results = 'hide', warning=FALSE, message=FALSE} +od_inter = filter(od, o != d) +desire_lines = od2line(od_inter, zones) +``` +## Python +```{python} +#| eval: false +# Filter OD data where origin and destination are different +od_inter = od[od['o'] != od['d']].copy() + +od_inter = od_inter.merge(zones[['geo_code', 'geometry']], left_on='o', right_on='geo_code', how='left') +od_inter.rename(columns={'geometry': 'origin_geometry'}, inplace=True) +od_inter = od_inter.merge(zones[['geo_code', 'geometry']], left_on='d', right_on='geo_code', how='left') +od_inter.rename(columns={'geometry': 'destination_geometry'}, inplace=True) + +``` +::: +Let's calculate the percentage of trips that are made by active travel + +::: {.panel-tabset} +## R +```{r, echo = T, results = 'hide'} +desire_lines$Active = (desire_lines$bicycle + desire_lines$foot) / + desire_lines$all * 100 +``` +## Python +```{python} +#| eval: false +desire_lines['Active'] = (desire_lines['bicycle'] + desire_lines['foot']) / desire_lines['all'] * 100 +``` +::: +Now use `tmap` to make a plot showing the number of trips and the percentage of people using active travel. + +::: {.panel-tabset} +## R +```{r} +#| echo: true +#| output: true +desire_lines = desire_lines[order(desire_lines$Active),] + +tm_shape(desire_lines) + # Define the data frame used to make the map + tm_lines(col = "Active", # We want to map lines, the colour (col) is based on the "Active" column + palette = "plasma", # Select a colour palette + alpha = 0.7, # Make lines slightly transparent + lwd = "all") + # The line width (lwd) is based on the "all" column + tm_layout(legend.outside = TRUE) + # Move the ledgend outside the map + tm_scale_bar() # Add a scale bar to the map +``` + +## Python +```{python} +#| eval: false +desire_lines = desire_lines.sort_values('Active') + +# Normalize line widths for plotting +max_trips = desire_lines['all'].max() +desire_lines['linewidth'] = (desire_lines['all'] / max_trips) * 5 + +# Plotting desire lines with active travel percentage +fig, ax = plt.subplots(figsize=(12, 10)) +desire_lines.plot( + ax=ax, + column='Active', + cmap='plasma', + linewidth=desire_lines['linewidth'], + alpha=0.7, + legend=True +) +plt.title('Desire Lines with Active Travel Percentage') + +# Add basemap (optional) +# ctx.add_basemap(ax, crs=desire_lines.crs.to_string()) + +plt.show() +``` +::: +Now that we have geometry attached to our data we can calculate other variables of interest. For example let's calculate the distacne travelled and see if it relates to the percentage of people who use active travel. + + +::: {.panel-tabset} +## R +```{r} +desire_lines$distance_direct_m = as.numeric(st_length(desire_lines)) +``` +## Python +```{python} +#| eval: false +desire_lines['distance_direct_m'] = desire_lines.geometry.length +``` +::: +Note the use of `as.numeric` by default `st_length` and many other functions return a special type of result with `unit`. Here we force the results back into the basic R numerical value. But be careful! The units you get back depend on the coordinate reference system, so check your data before you assume what values mean. + +::: {.panel-tabset} +## R +```{r} +ggplot(desire_lines) + + geom_point(aes(x = distance_direct_m, y = Active, size = all)) + + geom_smooth(aes(x = distance_direct_m, y = Active)) +``` +## Python +```{python} +#| eval: false +plt.figure(figsize=(8, 6)) +sns.scatterplot(data=desire_lines, x='distance_direct_m', y='Active', size='all', legend=False) +sns.regplot(data=desire_lines, x='distance_direct_m', y='Active', scatter=False, color='red') +plt.xlabel('Distance (meters)') +plt.ylabel('Active Travel Percentage') +plt.title('Active Travel vs Distance') +plt.show() +``` +::: +The blue line is a smoothed average of the data. It shows a common concept in transport research, the distance decay curve. In this case it shows that the longer the journey the less likely people are to use active travel. But this concept applies to all kinds of travel decisions. For example you are more likely to travel to a nearby coffee shop than a far away coffee shop. Different types of trip have different curves, but most people always have a bias for shorter trips. + + +# Homework + +1. Read Chapters 2-5 of [Geocomputation with R](https://r.geocompx.org/transport.html) +2. Work though Sections 13.1 to 13.4 of the Transport Chapter in [Geocomputation with R](https://r.geocompx.org/transport.html) +3. Bonus: Read more about using the [tmap package](https://r-tmap.github.io/tmap/) +4. Bonus: Read more about the [ggplot2 package](https://ggplot2.tidyverse.org/) +5. Bonus: Read Chapter 7 & 8 of [Geocomputation with R](https://r.geocompx.org/transport.html) + + +# References \ No newline at end of file diff --git a/pixi.toml b/pixi.toml new file mode 100644 index 0000000..eaf7a67 --- /dev/null +++ b/pixi.toml @@ -0,0 +1,28 @@ +[project] +channels = ["conda-forge"] +description = "Add a short description here" +name = "TDStests" +platforms = ["win-64"] +version = "0.1.0" + +[tasks] + +[dependencies] +jupyter = "*" +jupyter-cache = "*" +geopandas = "*" +matplotlib = "*" +shapely = "*" +seaborn = "*" +quarto = "*" +r-base = "*" +r-irkernel = "*" +r-tidyverse = "*" +r-sf = "*" +r-quarto = "*" +r-nycflights13 = "*" +r-remotes = "*" +r-DT = "*" +r-reticulate = "*" +r-spData = "*" +r-pak = "*" \ No newline at end of file diff --git a/practicals/1-intro.Rmd b/practicals/1-intro.Rmd deleted file mode 100644 index 73903d9..0000000 --- a/practicals/1-intro.Rmd +++ /dev/null @@ -1,272 +0,0 @@ ---- -title: "Introduction to transport data science" -subtitle: '
Practical' -author: "Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: - github_document: - number_sections: true -bibliography: ../tds.bib ---- - - - - - - - - - - - - - - -# Thinking about (transport) data science - -- Based on the contents of the lecture, come up with *your own* definition of data science -- How do you see yourself using data science over the next 1 year, 5 years, 20 years -- What do you hope to get out of it personally? -- Bonus: think of a question about a transport system you know well and how data science could help answer it, perhaps with reference to a sketch like that below - -#### How much potential is there for cycling across the transport network? {-} - -![](https://user-images.githubusercontent.com/1825120/127524923-7d9f5511-84a6-430b-8de9-a603a5524f39.png) - -# Questions about homework - -1. Reproduce this script: https://github.com/ITSLeeds/pct/blob/master/inst/test-setup.R - - - - - - - - - - - - - - - - - - - - - - - - - - - -```{r schematic, echo=FALSE} -# knitr::include_graphics("https://raw.githubusercontent.com/npct/pct-team/master/flow-model/flow-diag2.png") -``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -```{r, echo=FALSE} -# Identify available datasets and access and clean them -# Combine datasets from multiple sources -# Understand what machine learning is, which problems it is appropriate for compared with traditional statistical approaches, and how to implement machine learning techniques -# Visualise and communicate the results of transport data science, and know about setting-up interactive web applications -# Deciding when to use local computing power vs cloud services -``` - - - - -# Practical introduction to data science - -Before you run the code in this section, ensure that you have recently updated R and RStudio on your computer. -Furthermore, you will need to have installed a number of packages, as described here: https://docs.ropensci.org/stats19/articles/stats19-training-setup.html - -## Pre-requisites {-} - -You need to have a number of packages installed and loaded. -Install the packages by typing in the following commands into RStudio (you do not need to add the comments after the `#` symbol):^[ -Note: if you want to install the development version of a package from GitHub, you can do so. -Try, for example, running the following command: `remotes::install_github("ITSLeeds/pct")` -] - -```{r, eval=FALSE} -install.packages("remotes") -pkgs = c( - "nycflights13",# data package - "stats19", # downloads and formats open stats19 crash data - "tidyverse" # a package for user friendly data science -) -remotes::install_cran(pkgs) -remotes::install_github("nowosad/spDataLarge") -``` - -Load the tidyverse package as follows: - -```{r} -library(tidyverse) -``` - -This section will use content from Chapter 5 of the R for Data Science book [@grolemund_data_2016]. - -- Read [section 5.1](https://r4ds.had.co.nz/transform.html#filter-rows-with-filter) of R for Data Science and write code that reproduces the results in that section in the script `learning-tidyverse.R` - -Your script will start with something like this: - -```{r} -library(tidyverse) -library(nycflights13) -``` - -- Take a random sample of 10,000 flights and assign it to an object with the following line of code: - -```{r} -library(nycflights13) -flights_sample = sample_n(flights, 1e4) -unique(flights$carrier) -``` - -- Find the unique carriers with the `unique()` function - -- Create an object containing flights from United, American, or Delta, and assign it to `f`, as follows: - -```{r} -f = filter(flights, grepl(pattern = "UA|AA|DL", x = carrier)) -f2 = filter(flights, grepl(pattern = "UA", x = carrier) | - grepl(pattern = "AA", x = carrier) | - grepl(pattern = "DL", x = carrier) - ) -f3 = filter(flights, str_detect(carrier, "UA|AA|DL")) -``` - -- Create plots that visualise the sample flights, using code from Chapter 3 of the same book, starting with the following plot: - -```{r, message=FALSE, warning=FALSE} -ggplot(f) + - geom_point(aes(air_time, distance)) -``` - -- Add transparency so it looks like this (hint: use `alpha =` in the `geom_point()` function call): - -```{r, echo=FALSE} -ggplot(f) + - geom_point(aes(air_time, distance), alpha = 0.1) -``` - -- Add a colour for each carrier, so it looks something like this: - -```{r} -ggplot(f) + - geom_point(aes(air_time, distance, colour = carrier), alpha = 0.5) -``` - -- Bonus 1: find the average air time of those flights with a distance of 1000 to 2000 miles - -- Bonus 2: use the `lm()` function to find the relationship between flight distance and time, and plot the results (start the plot as follows, why did we use `na.omit()`? hint - find help with `?na.omit()`): - -```{r} -f = na.omit(f) -m = lm(air_time ~ distance, data = f) -f$pred = m$fitted.values -``` - -```{r, echo=FALSE} -ggplot(f) + - geom_point(aes(air_time, distance, colour = carrier), alpha = 0.5) + - geom_line(aes(pred, distance)) -``` - -# Homework - -1) create a reproducible document - -- Create an Rmarkdown file with the following command: - -```r -file.edit("learning-tidyverse.Rmd") -``` - -- Take a read of the guidance on RMarkdown files online and in the following location (or search online for the 'RMarkdown cheatsheet'): - -``` -Help > Cheatsheets > RMarkdown -``` - -- Put the code you generated for `tidyverse.R` into the Rmd file and knit it - -- Bonus: create a GitHub repo and publish the results of of your work (hint: putting `output: github_document` may help here!) - -2) Work-through the remaining exercises of the first sections in R4DS chapters 3 and 5 - - Write and R script, with comments, to show your working (and prove you've done it!) - -```{r, include=FALSE} -library(tidyverse) -mpg -ggplot(mpg) + - geom_point(mapping = aes(hwy, cyl, col = drv )) -library(nycflights13) -names(flights) -?flights -# Were delayed by at least an hour, but made up over 30 minutes in flight -# part 1: -delayed_hour = flights %>% - filter(dep_delay > 60) -nrow(delayed_hour) / nrow(flights) -# part 2: calculate length of delay -flight_delays = flights %>% - mutate(delay = dep_delay - arr_delay) -summary(flight_delays$delay) - -# part 3: -result = flight_delays %>% - filter(dep_delay > 60 & delay > 30) -nrow(result) - -summary(is.na(flights$arr_delay)) - -# base R approach -sel_delayed = flights$dep_delay > 60 & - !is.na(flights$dep_delay) -sel_arrive = flights$arr_delay < 30 & - !is.na(flights$arr_delay) -class(sel_arrive) -sel_combined = sel_arrive & sel_delayed -sum(sel_combined) -result2 = flights[sel_combined, ] -nrow(result2) -``` - - -3) Create an RMarkdown file containing reproducible code outlining what you learned today diff --git a/practicals/1-intro.md b/practicals/1-intro.md deleted file mode 100644 index 003d9f6..0000000 --- a/practicals/1-intro.md +++ /dev/null @@ -1,249 +0,0 @@ -Introduction to transport data science -================ -Robin Lovelace -University of Leeds -
- -Note: before you run this tutorial, ensure that you have recently -updated R and RStudio on your computer. Furthermore, you will need to -have installed a number of packages, as described here: - - - - - - - - - - - - - - -# 1 Thinking about (transport) data science - -- Based on the contents of the lecture, come up with *your own* - definition of data science -- How do you see yourself using data science over the next 1 year, 5 - years, 20 years -- What do you hope to get out of it personally? -- Bonus: think of a question about a transport system you know well and - how data science could help answer it, perhaps with reference to a - sketch like that below - -#### How much potential is there for cycling across the transport network? - -![](https://user-images.githubusercontent.com/1825120/127524923-7d9f5511-84a6-430b-8de9-a603a5524f39.png) - -# 2 Questions about homework - -1. Reproduce this script: - -2. Work through the transport chapter of Geocomputation with R: - - -# 3 Practical 2 - -See - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# 4 Practical introduction to data science - -## Pre-requisites - -You need to have a number of packages installed and loaded. Install the -packages by typing in the following commands into RStudio (you do not -need to add the comments after the `#` symbol):[^1] - -``` r -install.packages("remotes") -pkgs = c( - "nycflights13",# data package - "stats19", # downloads and formats open stats19 crash data - "tidyverse" # a package for user friendly data science -) -remotes::install_cran(pkgs) -remotes::install_github("nowosad/spDataLarge") -``` - -Load the tidyverse package as follows: - -``` r -library(tidyverse) -``` - - ## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── - ## ✔ dplyr 1.1.4 ✔ readr 2.1.5 - ## ✔ forcats 1.0.0 ✔ stringr 1.5.1 - ## ✔ ggplot2 3.4.4.9000 ✔ tibble 3.2.1 - ## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1 - ## ✔ purrr 1.0.2 - ## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── - ## ✖ dplyr::filter() masks stats::filter() - ## ✖ dplyr::lag() masks stats::lag() - ## ℹ Use the conflicted package () to force all conflicts to become errors - -This section will use content from Chapter 5 of the R for Data Science -book (**grolemund_data_2016?**). - -- Read [section - 5.1](https://r4ds.had.co.nz/transform.html#filter-rows-with-filter) of - R for Data Science and write code that reproduces the results in that - section in the script `learning-tidyverse.R` - -Your script will start with something like this: - -``` r -library(tidyverse) -library(nycflights13) -``` - -- Take a random sample of 10,000 flights and assign it to an object with - the following line of code: - -``` r -library(nycflights13) -flights_sample = sample_n(flights, 1e4) -unique(flights$carrier) -``` - - ## [1] "UA" "AA" "B6" "DL" "EV" "MQ" "US" "WN" "VX" "FL" "AS" "9E" "F9" "HA" "YV" - ## [16] "OO" - -- Find the unique carriers with the `unique()` function - -- Create an object containing flights from United, American, or Delta, - and assign it to `f`, as follows: - -``` r -f = filter(flights, grepl(pattern = "UA|AA|DL", x = carrier)) -f2 = filter(flights, grepl(pattern = "UA", x = carrier) | - grepl(pattern = "AA", x = carrier) | - grepl(pattern = "DL", x = carrier) - ) -f3 = filter(flights, str_detect(carrier, "UA|AA|DL")) -``` - -- Create plots that visualise the sample flights, using code from - Chapter 3 of the same book, starting with the following plot: - -``` r -ggplot(f) + - geom_point(aes(air_time, distance)) -``` - -![](1-intro_files/figure-gfm/unnamed-chunk-7-1.png) - -- Add transparency so it looks like this (hint: use `alpha =` in the - `geom_point()` function call): - - - - ## Warning: Removed 2117 rows containing missing values or values outside the scale range - ## (`geom_point()`). - -![](1-intro_files/figure-gfm/unnamed-chunk-8-1.png) - -- Add a colour for each carrier, so it looks something like this: - -``` r -ggplot(f) + - geom_point(aes(air_time, distance, colour = carrier), alpha = 0.5) -``` - - ## Warning: Removed 2117 rows containing missing values or values outside the scale range - ## (`geom_point()`). - -![](1-intro_files/figure-gfm/unnamed-chunk-9-1.png) - -- Bonus 1: find the average air time of those flights with a distance of - 1000 to 2000 miles - -- Bonus 2: use the `lm()` function to find the relationship between - flight distance and time, and plot the results (start the plot as - follows, why did we use `na.omit()`? hint - find help with - `?na.omit()`): - -``` r -f = na.omit(f) -m = lm(air_time ~ distance, data = f) -f$pred = m$fitted.values -``` - -![](1-intro_files/figure-gfm/unnamed-chunk-11-1.png) - -# 5 Homework - -1) create a reproducible document - -- Create an Rmarkdown file with the following command: - -``` r -file.edit("learning-tidyverse.Rmd") -``` - -- Take a read of the guidance on RMarkdown files online and in the - following location (or search online for the ‘RMarkdown cheatsheet’): - - - - Help > Cheatsheets > RMarkdown - -- Put the code you generated for `tidyverse.R` into the Rmd file and - knit it - -- Bonus: create a GitHub repo and publish the results of of your work - (hint: putting `output: github_document` may help here!) - -2) Work-through the remaining exercises of the first sections in R4DS - chapters 3 and 5 - -- Write and R script, with comments, to show your working (and prove - you’ve done it!) - -3) Create an RMarkdown file containing reproducible code outlining what - you learned today - -[^1]: Note: if you want to install the development version of a package - from GitHub, you can do so. Try, for example, running the following - command: `remotes::install_github("ITSLeeds/pct")` diff --git a/practicals/1-intro_files/figure-gfm/unnamed-chunk-13-1.png b/practicals/1-intro_files/figure-gfm/unnamed-chunk-13-1.png deleted file mode 100644 index e69de29..0000000 diff --git a/practicals/2-od.Rmd b/practicals/2-od.Rmd deleted file mode 100644 index 118a38d..0000000 --- a/practicals/2-od.Rmd +++ /dev/null @@ -1,273 +0,0 @@ ---- -title: "Origin-destination data" -subtitle: '
Practical' -author: "Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: - github_document: - number_sections: true -bibliography: ../tds.bib ---- - - - -## Pre-requisites {-} - -You need to have a number of packages installed and loaded. -Install the packages by typing in the following commands into RStudio (you do not need to add the comments after the `#` symbol):^[ -Note: if you want to install the development version of a package from GitHub, you can do so. -Try, for example, running the following command: `remotes::install_github("ITSLeeds/pct")` -] - -```{r, eval=FALSE} -install.packages("remotes") -pkgs = c( - "nycflights13",# data package - "pct", # package for getting travel data in the UK - "sf", # spatial data package - "stats19", # downloads and formats open stats19 crash data - "stplanr", # for working with origin-destination and route data - "tidyverse", # a package for user friendly data science - "tmap" # for making maps -) -remotes::install_cran(pkgs) -remotes::install_github("nowosad/spDataLarge") -``` - -Load the tidyverse package as follows: - -```{r} -library(tidyverse) -``` - -# Project set-up and tidyverse testing - -1. Check your packages are up1.to-date with `update.packages()` -1. Create an RStudio project with an appropriate name for this module (e.g. `TDSmodule`) -1. Create appropriate files for code, data and anything else (e.g. images) -1. Create a script called `learning-tidyverse.R`, e.g. with the following command: - -```r -dir.create("code") # -file.edit("code/learning-tidyverse.R") -``` - -# Getting started with transport data - -We're going to start by looking at the main types of transport data:^[ -Note: if you want to get zone data for a different region you can do so, e.g. with: -`zones = sf::read_sf("https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/west-yorkshire/z.geojson")` -] - -In this section we will look at basic transport data in the R package **stplanr**. - -Attach the `tidyverse`, `stplanr` and `sf` packages as follows: - -```{r} -library(tidyverse) -library(stplanr) -library(sf) -``` - -The `stplanr` package contains some data that we can use to demonstrate principles in Data Science, illustrated in the Figure below. Source: Chapter 1 of R for Data Science [@grolemund_r_2016] [available online](https://r4ds.had.co.nz/introduction.html). - -![](https://d33wubrfki0l68.cloudfront.net/571b056757d68e6df81a3e3853f54d3c76ad6efc/32d37/diagrams/data-science.png) -```{r} -# import -od_data = stplanr::od_data_sample -``` - -```{r} -# tidy -od_data = od_data %>% - rename(walk = foot) -``` - - -```{r} -# transform -od_data_walk = od_data %>% - filter(walk > 0) %>% - select(geo_code1, geo_code2, all, car_driver, walk) %>% - mutate(proportion_walk = walk / all, proportion_drive = car_driver / all) -``` - -```{r} -# visualise -plot(od_data_walk) -``` - -```{r} -# model -model1 = lm(proportion_walk ~ proportion_drive, data = od_data_walk) -od_data_walk$proportion_walk_predicted = model1$fitted.values -``` - -```{r} -# visualise -ggplot(od_data_walk) + - geom_point(aes(proportion_drive, proportion_walk)) + - geom_line(aes(proportion_drive, proportion_walk_predicted)) -``` - -```{r} -# transform -# ... -``` - -Exercises - -1. What is the class of the data in `od_data`? -1. Subset (filter) the data to only include OD pairs in which at least one person (`> 0`) person walks (bonus: on what % of the OD pairs does at least 1 person walk?) -2. Calculate the percentage who cycle in each OD pair in which at least 1 person cycles -3. Is there a positive relationship between walking and cycling in the data? -4. Plot the zones representing the `geo_code` variables in the OD data -5. Bonus: use the function `od2line()` in to convert the OD dataset into geographic desire lines - -```{r, echo=FALSE, eval=FALSE} -#1 -class(od_data) -``` - -```{r, echo=FALSE, eval=FALSE} -#2 -od_data_walk = od_data %>% - filter(walk > 0) -nrow(od_data_walk) / nrow(od_data) -``` - -```{r, echo=FALSE, eval=FALSE} -#3 -od_data = od_data %>% - filter(bicycle > 0) %>% - mutate(perc_cycle = (bicycle/all) * 100) -``` - -```{r, echo=FALSE, eval=FALSE} -#4 -od_data_new = od_data %>% - filter(walk > 0, bicycle>0 ) %>% - select(bicycle, walk, all) - -model = lm(walk ~ bicycle, weights = all, data = od_data_new) -od_data_new$walk_predicted = model$fitted.values - -ggplot(od_data_new) + - geom_point(aes(bicycle, walk, size = all)) + - geom_line(aes(bicycle, walk_predicted)) -``` - -```{r, echo=FALSE, eval=FALSE} -#5 -zones = sf::read_sf("https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/west-yorkshire/z.geojson") -zones_leeds = zones %>% - filter(lad_name == "Leeds") -plot(zones_leeds$geometry) -``` - -```{r, echo=FALSE, eval=FALSE} -#6 -desire_lines = od2line(flow = od_data, zones) -plot(desire_lines) -``` - -# Processing origin-destination data in Bristol - -This section is based on Chapter 12 of Geocomputation with R: https://geocompr.robinlovelace.net/transport.html - -The task is to reproduce the results shown in that chapter on your own computer. -Some code to get started on a subset of the data is shown below. - -Start with a medium-sized dataset: - -```{r} -# import -od = spDataLarge::bristol_od -head(od) -``` - -```{r} -# tidy -zones = spDataLarge::bristol_zones -zones = zones %>% - mutate(local_authority = word(string = name, 1)) -plot(zones %>% select(local_authority), key.pos = 1) -``` - -```{r, eval=FALSE, echo=FALSE} -# Find central data -# bristol_centre = geo_code("bristol") -#> [1] -2.597298 51.453802 -``` - -```{r} -# transform -bristol_sf = tmaptools::geocode_OSM("bristol", as.sf = TRUE, return.first.only = T, geometry = "point") -mapview::mapview(bristol_sf) -bristol_buffer_10km = geo_buffer(bristol_sf, dist = 10000) -zones_central = zones[bristol_buffer_10km, , op = sf::st_within] -# visualise -mapview::mapview(zones_central) -``` - - -```{r} -# transform -od_central = od %>% - filter(o %in% zones_central$geo_code) %>% - filter(d %in% zones_central$geo_code) -nrow(od_central) / nrow(od) -desire_lines = od2line(od_central, zones_central) -desire_lines$distance_direct_m = as.numeric(st_length(desire_lines)) -desire_lines = desire_lines %>% - mutate(proportion_active = (bicycle + foot) / all) -``` - -```{r, fig.show='hold', out.width="40%"} -# visualise -ggplot(desire_lines) + - geom_point(aes(distance_direct_m, proportion_active)) -ggplot(desire_lines) + - geom_point(aes(distance_direct_m, proportion_active, size = all), alpha = 0.3) -``` - -```{r} -# model/visualise -m1 = lm(proportion_active ~ - distance_direct_m + I(distance_direct_m^2), - data = desire_lines) -desire_lines = desire_lines %>% - mutate( - new_active_travel = m1$fitted.values * car_driver, - new_total_active = new_active_travel + bicycle + foot, - new_proportion_active = new_total_active / all - ) %>% - arrange(proportion_active) -ggplot(desire_lines) + - geom_point(aes(distance_direct_m, proportion_active, size = all), alpha = 0.3) + - geom_point(aes(distance_direct_m, new_proportion_active, size = all), alpha = 0.3, colour = "blue") - -``` - -```{r} -# visualise -ggplot(desire_lines) + - geom_sf(aes(colour = new_proportion_active, alpha = all)) -``` - -```{r} -library(tmap) -tm_shape(desire_lines) + - tm_lines(palette = "-viridis", breaks = c(0, 5, 10, 20, 40, 100) / 100, - lwd = "all", - scale = 9, - title.lwd = "Number of trips", - alpha = 0.6, - col = c("proportion_active", "new_proportion_active"), - title = "Active travel (%)" - ) + - tm_scale_bar() - -``` - -4) Try mapping OD data for West Yorkshire in preparation for the next practical on routing diff --git a/practicals/2-od.md b/practicals/2-od.md deleted file mode 100644 index e5cc353..0000000 --- a/practicals/2-od.md +++ /dev/null @@ -1,322 +0,0 @@ -Origin-destination data -================ -Malcolm Morgan and Robin Lovelace -University of Leeds -
- -# 1 Review Homework - -You should now be familiar with the basics of R and the `tidyverse`. If -you have not completed these tasks go back and do them first: - -- Read Chapters 2, 3, and 4 of [Reproducible road safety research with - R](https://itsleeds.github.io/rrsrr/basics.html) -- Read Chapters 3 and 5 of [R for Data - Science](https://r4ds.had.co.nz/data-visualisation.html) - -# 2 Getting started with GIS in R - -Note that this practical takes sections from Chapters 2 - 8 of -[Geocomputation with R](https://r.geocompx.org). You should expand your -knowledge by reading these chapters in full. - -## Pre-requisites - -You need to have a number of packages installed and loaded. Install the -packages by typing in the following commands into RStudio (you do not -need to add the comments after the `#` symbol) - -If you need to install any of these packages use: - -``` r -install.packages("sf") # Install a package from CRAN -remotes::install_github("Nowosad/spDataLarge") # install from GitHub using the remotes package -``` - -``` r -library(sf) # vector data package -library(tidyverse) # tidyverse packages -``` - -- It relies on **spData**, which loads datasets used in the code - examples of this chapter: - -``` r -library(spData) # spatial data package -``` - -1. Check your packages are up-to-date with `update.packages()` -2. Create an RStudio project with an appropriate name for this session - (e.g. `practical2`) -3. Create appropriate folders for code, data and anything else - (e.g. images) -4. Create a script called `learning-OD.R`, e.g. with the following - command: - -``` r -dir.create("code") # -file.edit("code/learning-OD.R") -``` - -## 2.1 Basic sf operations - -We will start with a simple map of the world. Load the `world` object -from the `spData` package. Notice the use of `::` to say that you want -the `world` object from the `spData` package. - -``` r -world = spData::world -``` - -Use some basic R functions to explore the `world` object. -e.g. `class(world)`, `dim(world)`, `head(world)`, `summary(world)`. Also -view the `world` object by clicking on it in the Environment panel. - -`sf` objects can be plotted with `plot()`. - -``` r -plot(world) -``` - -![](2-od_files/figure-gfm/unnamed-chunk-5-1.png) - -Note that this makes a map of each column in the data frame. Try some -other plotting options - -``` r -plot(world[3:6]) -``` - -![](2-od_files/figure-gfm/unnamed-chunk-6-1.png) - -``` r -plot(world["pop"]) -``` - -![](2-od_files/figure-gfm/unnamed-chunk-6-2.png) - -## 2.2 Basic spatial operations - -Load the `nz` and `nz_height` datasets from the `spData` package. - -``` r -nz = spData::nz -nz_height = spData::nz_height -``` - -We can use `tidyverse` functions like `filter` and `select` on `sf` -objects in the same way you did in Practical 1. - -``` r -canterbury = nz %>% filter(Name == "Canterbury") -canterbury_height = nz_height[canterbury, ] -``` - -In this case we filtered the `nz` object to only include places called -`Canterbury` and then did and intersection to find objects in the -`nz_height` object that are in Canterbury. - -This syntax is not very clear. But is the equivalent to - -``` r -canterbury_height = nz_height[canterbury, , op = st_intersects] -``` - -There are many different types of relationships you can use with `op`. -Try `?st_intersects()` to see more. For example this would give all the -places not in Canterbury - -``` r -nz_height[canterbury, , op = st_disjoint] -``` - -![Topological relations between vector geometries, inspired by Figures 1 -and 2 in Egenhofer and Herring (1990). The relations for which the -function(x, y) is true are printed for each geometry pair, with x -represented in pink and y represented in blue. The nature of the spatial -relationship for each pair is described by the Dimensionally Extended -9-Intersection Model -string.](https://r.geocompx.org/figures/relations-1.png) - -# 3 Getting started with OD data - -In this section we will look at basic transport data in the R package -**stplanr**. - -Load the `stplanr` package as follows: - -``` r -library(stplanr) -``` - - ## Warning: package 'stplanr' was built under R version 4.2.2 - -The `stplanr` package contains some data that we can use to demonstrate -principles in Data Science, illustrated in the Figure below. Source: -Chapter 1 of R for Data Science (Grolemund and Wickham 2016) [available -online](https://r4ds.had.co.nz/introduction.html). - -![](https://d33wubrfki0l68.cloudfront.net/571b056757d68e6df81a3e3853f54d3c76ad6efc/32d37/diagrams/data-science.png) - -First we will load some sample data: - -You can click on the data in the environment panel to view it or use -`head(od_data)` Now we will rename one of the columns from `foot` to -`walk` - -Next we will made a new dataset `od_data_walk` by taking `od_data` and -piping it (`%>%`) to `filter` the data frame to only include rows where -`walk > 0`. Then `select` a few of the columns and calculate two new -columns `proportion_walk` and `proportion_drive`. - -We can use the generic `plot` function to view the relationships between -variables - -``` r -plot(od_data_walk) -``` - -![](2-od_files/figure-gfm/unnamed-chunk-15-1.png) - -R has built in modelling functions such as `lm` lets make a simple model -to predict the proportion of people who walk based on the proportion of -people who drive. - -We can use the `ggplot2` package to graph our model predictions. - -``` r -ggplot(od_data_walk) + - geom_point(aes(proportion_drive, proportion_walk)) + - geom_line(aes(proportion_drive, proportion_walk_predicted)) -``` - -![](2-od_files/figure-gfm/unnamed-chunk-17-1.png) - -Exercises - -1. What is the class of the data in `od_data`? -2. Subset (filter) the data to only include OD pairs in which at least - one person (`> 0`) person walks (bonus: on what % of the OD pairs - does at least 1 person walk?) -3. Calculate the percentage who cycle in each OD pair in which at least - 1 person cycles -4. Is there a positive relationship between walking and cycling in the - data? -5. Bonus: use the function `od2line()` in to convert the OD dataset - into geographic desire lines - -# 4 Processing origin-destination data in Bristol - -This section is based on [Chapter 12 of Geocomputation with -R](https://geocompr.robinlovelace.net/transport.html). You should read -this chapter in full in your own time. - -We need the `stplanr` package which provides many useful functions for -transport analysis and `tmap` package which enables advanced mapping -features. - -``` r -library(stplanr) -library(tmap) -``` - -We will start by loading two datasets: - -``` r -od = spDataLarge::bristol_od -zones = spDataLarge::bristol_zones -``` - -Explore these datasets using the functions you have already learnt -(e.g. `head`,`nrow`). - -You will notice that the `od` datasets has shared id values with the -`zones` dataset. We can use these to make desire lines between each -zone. But first we must filter out trips that start and end in the same -zone. - -``` r -od_inter = filter(od, o != d) -desire_lines = od2line(od_inter, zones) -``` - -Let’s calculate the percentage of trips that are made by active travel - -``` r -desire_lines$Active = (desire_lines$bicycle + desire_lines$foot) / - desire_lines$all * 100 -``` - -Now use `tmap` to make a plot showing the number of trips and the -percentage of people using active travel. - -``` r -desire_lines = desire_lines[order(desire_lines$Active),] - -tm_shape(desire_lines) + # Define the data frame used to make the map - tm_lines(col = "Active", # We want to map lines, the colour (col) is based on the "Active" column - palette = "plasma", # Select a colour palette - alpha = 0.7, # Make lines slightly transparent - lwd = "all") + # The line width (lwd) is based on the "all" column - tm_layout(legend.outside = TRUE) + # Move the ledgend outside the map - tm_scale_bar() # Add a scale bar to the map -``` - -![](2-od_files/figure-gfm/unnamed-chunk-27-1.png) - -Now that we have geometry attached to our data we can calculate other -variables of interest. For example let’s calculate the distacne -travelled and see if it relates to the percentage of people who use -active travel. - -``` r -desire_lines$distance_direct_m = as.numeric(st_length(desire_lines)) -``` - -Note the use of `as.numeric` by default `st_length` and many other -functions return a special type of result with `unit`. Here we force the -results back into the basic R numerical value. But be careful! The units -you get back depend on the coordinate reference system, so check your -data before you assume what values mean. - -``` r -ggplot(desire_lines) + - geom_point(aes(x = distance_direct_m, y = Active, size = all)) + - geom_smooth(aes(x = distance_direct_m, y = Active)) -``` - -![](2-od_files/figure-gfm/unnamed-chunk-29-1.png) - -The blue line is a smoothed average of the data. It shows a common -concept in transport research, the distance decay curve. In this case it -shows that the longer the journey the less likely people are to use -active travel. But this concept applies to all kinds of travel -decisions. For example you are more likely to travel to a nearby coffee -shop than a far away coffee shop. Different types of trip have different -curves, but most people always have a bias for shorter trips. - -# 5 Homework - -1. Read Chapters 2-5 of [Geocomputation with - R](https://r.geocompx.org/transport.html) -2. Work though Sections 13.1 to 13.4 of the Transport Chapter in - [Geocomputation with R](https://r.geocompx.org/transport.html) -3. Bonus: Read more about using the [tmap - package](https://r-tmap.github.io/tmap/) -4. Bonus: Read more about the [ggplot2 - package](https://ggplot2.tidyverse.org/) -5. Bonus: Read Chapter 7 & 8 of [Geocomputation with - R](https://r.geocompx.org/transport.html) - -# 6 References - -
- -
- -Grolemund, Garrett, and Hadley Wickham. 2016. *R for Data Science*. -O’Reilly Media. - -
- -
diff --git a/practicals/2-od_files/figure-gfm/unnamed-chunk-17-1.png b/practicals/2-od_files/figure-gfm/unnamed-chunk-17-1.png deleted file mode 100644 index a8a627a..0000000 Binary files a/practicals/2-od_files/figure-gfm/unnamed-chunk-17-1.png and /dev/null differ diff --git a/practicals/2day-exercises.md b/practicals/2day-exercises.md deleted file mode 100644 index 6a4b677..0000000 --- a/practicals/2day-exercises.md +++ /dev/null @@ -1,100 +0,0 @@ -Transport Data Science with R Practical Exercises -================ - -These practicals aim to test your knowledge of material covered in the Transport Data Science module hosted at - -It will use the following CRAN packages: - -``` r -library(dplyr) -library(osmdata) -library(pct) -library(sf) -library(stplanr) -library(tmap) -``` - -How to use R/RStudio effectively -================================ - -- Check your packages are up-to-date with `update.packages()` -- Create an RStudio project with an appropriate name for this course (e.g. `TDSwithR`) -- Create a script called `set-up.R`, e.g. with **one** the following commands: - -``` r -file.edit("get-transport-data.R") -``` - -Using packages: example with sf/ggplot2 -======================================= - -Spatial data analysis -===================== - -Visualising spatial datasets -============================ - -stats19 data analysis - with spatial/temporal analysis -====================================================== - -1. Download and plot all crashes reported in Great Britain in 2017 (hint: see [the stats19 vignette](https://cran.r-project.org/web/packages/stats19/vignettes/stats19.html)) -2. Filter crashes that happened in West Yorkshire -3. Get and plot origin-destination data in `west-yorkshire` with the `pct` package hosted at: (bonus: look at the source code of `get_od()` and download the origin-destination data with `download.file()`) - -Bonus exercises ---------------- - -Identify a region and zonal units of interest from - -1. Read them into R as an `sf` object -2. Join-on data from a non-geographic object -3. Add a data access section to your in progress portfolio -4. Get origin-destination data from Uber - -OD data with stplanr -==================== - -1. Create an object representing desire lines in West Yorkshire, e.g, with: `desire_lines_all = pct::get_pct_lines(region = "west-yorkshire")` -2. Get data from Leeds and subset the desire lines with a value of `all` of 200 or above -3. Create a buffer of 500 m for each desire line and calculate the number of crashes that happened within each (using STATS19 data downloaded in the previous exercise) -4. Create a facetted plot showing the temporal distribution of crashes in West Yorksire (you can choose whether to show these over the months of the year, over days of the week, or over the hours of a day) -5. Do a spatio-temporal subset to identify the crashes that happened within the most commonly travelled desire line between 07:00 and 10:00 during weekdays. - -Accessing crowd-sourced data from OSM -------------------------------------- - -- Type code into the script created in the previous section so that it can reproduce this plot: - -``` r -location = opq("leeds") %>% - add_osm_feature(key = "railway", value = "station") %>% - osmdata_sf() -station_points = location$osm_points["name"] -tm_shape(location$osm_polygons) + - tm_polygons(col = "red") + - tm_shape(station_points) + - tm_dots(col = "red") + - tm_text(text = "name", size = 1) -``` - - - -1. Download cycleway data with the tag highway=cycleway for Leeds from -2. Load the data in R and plot it with `tmap` (bonus: now try to get the same data using the **osmdata** package) - -Local route network analysis -============================ - -See exercises in lecture slides. - -Data and methods for assessing cycling potential -================================================ - -1. Identify the top 10 desire lines in West Yorkshire along which at least 100 people travel to work, by: - - The percentage who walk - - The percentage who cycle - - Bonus: Find the top 10 for *all* modes of transport and plot the results -2. Download origin-destination data from 2011 Census using the function `pct::get_od()`. -3. Convert these origin-destination pairs into geographic desire lines between centroids in Leeds (e.g. as generated by the function `pct::get_pct_centroids()`) and plot the result. -4. Find the route along the most travelled desire line in Leeds and plot the result. -5. Get cycle route data for West Yorkshire and use the function `overline2()` to identify the routes along which most people walk to work. diff --git a/practicals/2day.Rmd b/practicals/2day.Rmd deleted file mode 100644 index cc55aed..0000000 --- a/practicals/2day.Rmd +++ /dev/null @@ -1,160 +0,0 @@ ---- -output: - github_document - # pdf_document: - # number_sections: true -title: Transport Data Science with R Practical Exercises ---- - -These practicals aim to test your knowledge of material covered in the Transport Data Science module hosted at https://github.com/ITSLeeds/TDS - -# Project set-up - -- Check your packages are up-to-date with `update.packages()` -- Create an RStudio project with an appropriate name for this course (e.g. `TDSwithR`) -- Create a script called `set-up.R`, e.g. with **one** the following commands: - -```r -file.edit("get-transport-data.R") -``` - - -- Create an interactive map showing the course location with the following commands (you should see the map shown below): - -```{r} - - - -# test ability to install packages -install.packages("remotes") - -# install packages we'll use (remotes is more efficient at installing them) -pkgs = c( - "pct", - "stats19", - "stplanr", - "tidyverse", - "sf", - "tmap", - "dodgr", - "osmdata", - "pbapply" -) -``` - -```{r, eval=FALSE} -remotes::install_cran(pkgs) -``` - - - -```{r} - -# load the pkgs -lapply(pkgs, library, character.only = TRUE) -tmap_mode("plot") # use "view" for interactive maps - -# Test link with osmdata works: -osm_data = opq(bbox = "isle-of-wight") %>% - add_osm_feature(key = "name", value = "Newport Road") %>% - osmdata_sf() -horseferry_road = osm_data$osm_lines - -qtm(horseferry_road) -horseferry_region = horseferry_road %>% - st_transform(27700) %>% - st_buffer(500) %>% - st_union() %>% - st_transform(4326) - -# Test stats19 data downloads -a = get_stats19(year = 2017, type = "acc", ask = FALSE) -asf = format_sf(a, lonlat = TRUE) -horseferry_crashes = asf[horseferry_region, ] -plot(horseferry_crashes) - -# Test pct data downloads -rnet = get_pct_rnet(region = "london") -horseferry_routenet = rnet[horseferry_region, ] - -# Final combined plot -tm_shape(horseferry_region) + - tm_borders() + - tm_shape(horseferry_road) + - tm_lines("red", scale = 9) + - tm_shape(horseferry_routenet) + - tm_lines(lwd = "bicycle", scale = 9, col = "blue") + - tm_shape(horseferry_crashes) + - tm_dots("accident_severity", size = 0.5, alpha = 0.5, palette = "magma") -``` - -# Getting transport data - -## Accessing crowd-sourced data from OSM - -- Type code into the script created in the previous section so that it can reproduce this plot: - -```{r, message=FALSE, warning=FALSE, out.height="5cm"} -location = opq("leeds") %>% - add_osm_feature(key = "railway", value = "station") %>% - osmdata_sf() -station_points = location$osm_points["name"] -tm_shape(location$osm_polygons) + - tm_polygons(col = "red") + - tm_shape(station_points) + - tm_dots(col = "red") + - tm_text(text = "name", size = 1) -``` - -```{r, echo=FALSE, eval=FALSE} -write_sf(station_points, "station_points.gpkg") -piggyback::pb_upload("station_points.gpkg") -``` - -1. Download cycleway data with the tag highway=cycleway for Leeds from https://overpass-turbo.eu/ -1. Load the data in R and plot it with `tmap` (bonus: now try to get the same data using the **osmdata** package) - - -## Get official data with stats19 and pct packages - -1. Download and plot all crashes reported in Great Britain in 2017 (hint: see [the stats19 vignette](https://cran.r-project.org/web/packages/stats19/vignettes/stats19.html)) -1. Filter crashes that happened in West Yorkshire -1. Get and plot origin-destination data in `west-yorkshire` with the `pct` package hosted at: https://github.com/ITSLeeds/pct (bonus: look at the source code of `get_od()` and download the origin-destination data with `download.file()`) - -## Bonus exercises - -Identify a region and zonal units of interest from http://geoportal.statistics.gov.uk/ - -1. Read them into R as an `sf` object -1. Join-on data from a non-geographic object -1. Add a data access section to your in progress portfolio -1. Get origin-destination data from Uber - -# Working with spatio-temporal data - -1. Create an object representing desire lines in West Yorkshire, e.g, with: `desire_lines_all = pct::get_pct_lines(region = "west-yorkshire")` -1. Get data from Leeds and subset the desire lines with a value of `all` of 200 or above -1. Create a buffer of 500 m for each desire line and calculate the number of crashes that happened within each (using STATS19 data downloaded in the previous exercise) -1. Create a facetted plot showing the temporal distribution of crashes in West Yorksire (you can choose whether to show these over the months of the year, over days of the week, or over the hours of a day) -1. Do a spatio-temporal subset to identify the crashes that happened within the most commonly travelled desire line between 07:00 and 10:00 during weekdays. - -```{r, echo=FALSE} -desire_lines_all = pct::get_pct_lines(region = "west-yorkshire") - -``` - -# Traffic data and pollution analysis with R - -See exercises in lecture slides. - -# From desire lines to route networks - -1. Identify the top 10 desire lines in West Yorkshire along which at least 100 people travel to work, by: - - The percentage who walk - - The percentage who cycle - - Bonus: Find the top 10 for *all* modes of transport and plot the results -1. Download origin-destination data from 2011 Census using the function `pct::get_od()`. -1. Convert these origin-destination pairs into geographic desire lines between centroids in Leeds (e.g. as generated by the function `pct::get_pct_centroids()`) and plot the result. -1. Find the route along the most travelled desire line in Leeds and plot the result. -1. Get cycle route data for West Yorkshire and use the function `overline2()` to identify the routes along which most people walk to work. - diff --git a/practicals/2day_files/figure-gfm/unnamed-chunk-19-1.png b/practicals/2day_files/figure-gfm/unnamed-chunk-19-1.png deleted file mode 100644 index 1d17f51..0000000 Binary files a/practicals/2day_files/figure-gfm/unnamed-chunk-19-1.png and /dev/null differ diff --git a/practicals/2day_files/figure-markdown_github/unnamed-chunk-19-1.png b/practicals/2day_files/figure-markdown_github/unnamed-chunk-19-1.png deleted file mode 100644 index c1e313e..0000000 Binary files a/practicals/2day_files/figure-markdown_github/unnamed-chunk-19-1.png and /dev/null differ diff --git a/practicals/3-data-structures.Rmd b/practicals/3-data-structures.Rmd deleted file mode 100644 index c38e88e..0000000 --- a/practicals/3-data-structures.Rmd +++ /dev/null @@ -1,278 +0,0 @@ ---- -title: "Data structures" -subtitle: '
Practical' -author: "Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: github_document -bibliography: ../tds.bib ---- - -```{r, include=FALSE} -knitr::opts_chunk$set(out.width = "50%", warning = FALSE) -library(tidyverse) -``` - -## Agenda - -- Thinking about Transport Data Science and what you want to get out of it (20 minutes) - -- Questions on the homework (20 minutes) - -- Reading-in transport datasets (20 minutes) - - I will do a live demo - -- Break - -- Getting started with the data structures practical (20 min) - - We will get started with the data together - -- Data Structures practical + getting help (1 hour) - -## What do you want to get out of Transport Data Science? - -- Specifically which skills? - - Data cleaning - - Visualisation - - Modelling -- What do you want to do with those skills? - - Commercial - - Public interest - - Educational - - Other - - All of the above -- What kinds of data do you want to be working with? - - Commercial - - Open - - Big data - - Accurate data - - All the above -- Where do you see your career in 5 years from now? - - -## Review of homework exercise: demo then individual Q&A - -**Note**: Ensure that you have the necessary packages installed. -If you do not, you can install them as follows: - -```{r, eval=FALSE} -install.packages("pct") # install the pct package -``` - -```{r} -library(tidyverse) -library(tmap) -tmap_mode("view") -url_locations = "https://github.com/ITSLeeds/TDS/raw/master/messages/locations.csv" -locations = read_csv(url_locations) -locations_sf = locations %>% - filter(!is.na(lon) & !is.na(lat)) %>% - sf::st_as_sf(coords = c("lon", "lat")) -tm_shape(locations_sf) + - tm_dots("name") -``` - -### Work through practical and the example with Bristol (~60 minutes) - -See https://github.com/ITSLeeds/TDS/blob/master/practicals/2-software.md - -```{r} - -``` - -### Think about research questions and data for the coursework (~30 minutes) - -See - -```{r} -# Ctl+Enter -# import process -u = "https://github.com/ITSLeeds/TDS/raw/master/messages/locations.csv" -locations = read_csv(file = u) -``` - -```{r} -library(tidyverse) -# tidy data -locations = locations %>% - mutate(n_coffee_per_day = n_coffee / 7) -# sanity check on the data -# type in Tab to autocomplete -mean(locations$n_coffee_per_day, na.rm = TRUE) -``` - -```{r} -locations -plot(locations) -plot(locations$lon, locations$lat) -``` - -```{r} -locations_sf = locations %>% - filter(!is.na(lat)) %>% - sf::st_as_sf(coords = c("lon", "lat")) -# visualise -tm_shape(locations_sf) + - tm_dots() -``` - - - - -## Practical demo of zones - -- Get data on the zones in West Yorkshire with the following command: - -```{r} -zones = pct::get_pct_zones(region = "west-yorkshire") -``` - -- Find out the class, names, number of columns and number of rows in the `zones` dataset using functions such as `names()`, `nrow()` and `ncol()`. - -```{r, echo=FALSE, eval=FALSE} -names(zones) -``` - -- Plot the number of car trips and walking trips as follows, what do you notice about the results? Where do you think there is most potential to increase walking levels? - -```{r, eval=FALSE} -plot(zones["car_driver"]) -plot(zones["foot"]) -``` - -- How would you select only the `car_driver` column in the zones object in the tidyverse? Hint it would begin with the following (incomplete) lines: - -```{r, eval=FALSE} -library(tidyverse) -zones %>% select( -``` - -- Create a new object called `zones_active_modes` that only contains the `bicycle` and `foot` attribute columns. Plot it (the results should look like those below). - -```{r, echo=FALSE} -zones_active_modes = zones %>% select(bicycle, foot) -plot(zones_active_modes) -``` - -- Which zone has the highest level of cycling, and where is it? - -- Use the function `filter()` - -### OD Data - - - - -### Desire lines - -- Read-in top 1000 desire lines for Leeds with the following code (hint: rather than typing the url of the file you can copy-paste it from [github.com/ITSLeeds/TDS](https://github.com/ITSLeeds/TDS)): - -```{r, message=FALSE} -library(dplyr) -library(sf) -u = "https://github.com/ITSLeeds/TDS/releases/download/0.1/desire_lines.geojson" - -download.file(u, "desire_lines.geojson") -desire_lines = read_sf("desire_lines.geojson") -# note: you can also read-in the file from the url: -# desire_lines = read_sf(u) -``` - -Plot the lines statically as follows: - -```{r} -library(tmap) -tm_shape(desire_lines) + - tm_lines() -``` - -Plot the lines showing the number of car drivers as follows: - -```{r} -tm_shape(desire_lines) + - tm_lines(col = "car_driver") -``` - -- Plot the same lines, but with colour according to the number of people who walked to work in the 2011 Census - -```{r, include=FALSE} -tm_shape(desire_lines) + - tm_lines(col = "foot") -``` - -- Re-do the plot of the number of trips made by driving, but make the line widths proportional to the total number (`all`) trips made (hint: you may need to set the scale with `scale = 5`, or another number greater than 1, for example) - -```{r, include=FALSE} -tm_shape(desire_lines) + - tm_lines(col = "foot", lwd = "all", scale = 9) -``` - -Filter-out all lines between 1 and 3km and call the resulting object`desire_lines_1_3km` with the following command (or similar): - -```{r} -desire_lines_1_3km = desire_lines %>% - filter(e_dist_km > 1 & e_dist_km < 3) -``` - -- Plot the results to make sure the operation worked (you should get a result like the on below): - -```{r, echo=FALSE} -plot(desire_lines_1_3km$geometry) -``` - -Create a new variable called `percent_drive` that contains the percentage of trips driven in each of the lines in the `desire_lines_1_3km` object with the following command: - -```{r} -desire_lines_pcar = desire_lines_1_3km %>% - mutate(percent_drive = car_driver / all * 100) -``` - -- Find the top 100 most 'car dependent' short desire lines in West Yorkshire and plot the results. It should look something like this: - -```{r, echo=FALSE} -car_dep_100 = desire_lines_pcar %>% - top_n(n = 100, wt = percent_drive) -tm_shape(car_dep_100) + - tm_lines(col = "percent_drive", lwd = "all", scale = 5) -``` - -- Plot the results in an interactive map and explore the results. Where are the top 100 most car-dependent major commuting desire lines in West Yorkshire (hint: you may use the `ttm()` function to switch to interactive mode in **tmap**)? - - -```{r, echo=FALSE} -ttm() -tm_shape(car_dep_100) + - tm_lines(col = "percent_drive", lwd = "all", scale = 5) -``` - -## Homework - -- Work through Chapter 12 of Geocomputation with R on Transport - https://geocompr.robinlovelace.net/transport.html -- Save your workings in an R script - -Bonus 1 Complete exercise 1 (not bonus) - -Bonus 1 (non technical): answer question 3 - -Bonus 2 (technical): can you reproduce the results for Leeds? This starting point may be useful: - - -```{r, eval=FALSE} -region = "west-yorkshire" -b = "https://github.com/npct/pct-outputs-regional-notR/raw" -u = paste0("/master/commute/msoa/", region) -u_od = paste0(b, u, "/od_attributes.csv") -od = readr::read_csv(u_od) -z = sf::read_sf(paste0(b, u, "/z.geojson")) -cents = sf::read_sf(paste0(b, u, "/c.geojson")) -od_clean = od %>% - select(-id) %>% - filter(geo_code1 %in% cents$geo_code) %>% - filter(geo_code2 %in% cents$geo_code) - - -desire_lines = stplanr::od2line(flow = od_clean, cents) -tm_shape(desire_lines$geometry[1:99]) %>% - tm_lines() -``` - - diff --git a/practicals/3-data-structures.md b/practicals/3-data-structures.md deleted file mode 100644 index b8783c2..0000000 --- a/practicals/3-data-structures.md +++ /dev/null @@ -1,336 +0,0 @@ -Data structures -================ -Robin Lovelace -University of Leeds -
- -## Agenda - -- Thinking about Transport Data Science and what you want to get out - of it (20 minutes) - -- Questions on the homework (20 minutes) - -- Reading-in transport datasets (20 minutes) - - - I will do a live demo - -- Break - -- Getting started with the data structures practical (20 min) - - - We will get started with the data together - -- Data Structures practical + getting help (1 hour) - -## What do you want to get out of Transport Data Science? - -- Specifically which skills? - - Data cleaning - - Visualisation - - Modelling -- What do you want to do with those skills? - - Commercial - - Public interest - - Educational - - Other - - All of the above -- What kinds of data do you want to be working with? - - Commercial - - Open - - Big data - - Accurate data - - All the above -- Where do you see your career in 5 years from now? - -## Review of homework exercise: demo then individual Q&A - -**Note**: Ensure that you have the necessary packages installed. If you -do not, you can install them as follows: - -``` r -install.packages("pct") # install the pct package -``` - -``` r -library(tidyverse) -library(tmap) -tmap_mode("view") -``` - - ## tmap mode set to interactive viewing - -``` r -url_locations = "https://github.com/ITSLeeds/TDS/raw/master/messages/locations.csv" -locations = read_csv(url_locations) -``` - - ## - ## ── Column specification ──────────────────────────────────────────────────────── - ## cols( - ## name = col_character(), - ## lon = col_double(), - ## lat = col_double(), - ## n_coffee = col_double(), - ## favourite_mode = col_character() - ## ) - -``` r -locations_sf = locations %>% - filter(!is.na(lon) & !is.na(lat)) %>% - sf::st_as_sf(coords = c("lon", "lat")) -tm_shape(locations_sf) + - tm_dots("name") -``` - - ## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 7.0.0 - - - -### Work through practical and the example with Bristol (\~60 minutes) - -See - - -### Think about research questions and data for the coursework (\~30 minutes) - -See - -``` r -# Ctl+Enter -# import process -u = "https://github.com/ITSLeeds/TDS/raw/master/messages/locations.csv" -locations = read_csv(file = u) -``` - - ## - ## ── Column specification ──────────────────────────────────────────────────────── - ## cols( - ## name = col_character(), - ## lon = col_double(), - ## lat = col_double(), - ## n_coffee = col_double(), - ## favourite_mode = col_character() - ## ) - -``` r -library(tidyverse) -# tidy data -locations = locations %>% - mutate(n_coffee_per_day = n_coffee / 7) -# sanity check on the data -# type in Tab to autocomplete -mean(locations$n_coffee_per_day, na.rm = TRUE) -``` - - ## [1] 0.2857143 - -``` r -locations -``` - - ## # A tibble: 19 x 6 - ## name lon lat n_coffee favourite_mode n_coffee_per_day - ## - ## 1 rl -1.5 53.8 5 walk 0.714 - ## 2 mb 16.4 48.2 2 walk 0.286 - ## 3 ct -1.5 53.8 3 bicycle 0.429 - ## 4 mm NA NA NA NA - ## 5 jt -1.6 53.8 1 climb 0.143 - ## 6 ea NA NA NA NA - ## 7 na NA NA NA NA - ## 8 bi NA NA NA NA - ## 9 mb NA NA NA NA - ## 10 ab -1.5 53.8 1 walk 0.143 - ## 11 ld NA NA NA NA - ## 12 rf NA NA NA NA - ## 13 dm NA NA NA NA - ## 14 sp NA NA NA NA - ## 15 mp NA NA NA NA - ## 16 ot -1.5 53.8 4 walk 0.571 - ## 17 yw 113. 23.1 0 subway 0 - ## 18 sw NA NA NA NA - ## 19 bz 114. 22.3 0 walk 0 - -``` r -plot(locations) -``` - - - -``` r -plot(locations$lon, locations$lat) -``` - - - -``` r -locations_sf = locations %>% - filter(!is.na(lat)) %>% - sf::st_as_sf(coords = c("lon", "lat")) -# visualise -tm_shape(locations_sf) + - tm_dots() -``` - - - -## Practical demo of zones - -- Get data on the zones in West Yorkshire with the following command: - -``` r -zones = pct::get_pct_zones(region = "west-yorkshire") -``` - -- Find out the class, names, number of columns and number of rows in - the `zones` dataset using functions such as `names()`, `nrow()` and - `ncol()`. - -- Plot the number of car trips and walking trips as follows, what do - you notice about the results? Where do you think there is most - potential to increase walking levels? - -``` r -plot(zones["car_driver"]) -plot(zones["foot"]) -``` - -- How would you select only the `car_driver` column in the zones - object in the tidyverse? Hint it would begin with the following - (incomplete) lines: - -``` r -library(tidyverse) -zones %>% select( -``` - -- Create a new object called `zones_active_modes` that only contains - the `bicycle` and `foot` attribute columns. Plot it (the results - should look like those below). - - - -- Which zone has the highest level of cycling, and where is it? - -- Use the function `filter()` - -### OD Data - -### Desire lines - -- Read-in top 1000 desire lines for Leeds with the following code - (hint: rather than typing the url of the file you can copy-paste it - from [github.com/ITSLeeds/TDS](https://github.com/ITSLeeds/TDS)): - -``` r -library(dplyr) -library(sf) -u = "https://github.com/ITSLeeds/TDS/releases/download/0.1/desire_lines.geojson" - -download.file(u, "desire_lines.geojson") -desire_lines = read_sf("desire_lines.geojson") -# note: you can also read-in the file from the url: -# desire_lines = read_sf(u) -``` - -Plot the lines statically as follows: - -``` r -library(tmap) -tm_shape(desire_lines) + - tm_lines() -``` - - - -Plot the lines showing the number of car drivers as follows: - -``` r -tm_shape(desire_lines) + - tm_lines(col = "car_driver") -``` - - - -- Plot the same lines, but with colour according to the number of - people who walked to work in the 2011 Census - -- Re-do the plot of the number of trips made by driving, but make the - line widths proportional to the total number (`all`) trips made - (hint: you may need to set the scale with `scale = 5`, or another - number greater than 1, for example) - -Filter-out all lines between 1 and 3km and call the resulting -object`desire_lines_1_5km` with the following command (or similar): - -``` r -desire_lines_1_5km = desire_lines %>% - filter(e_dist_km > 1 & e_dist_km < 3) -``` - -- Plot the results to make sure the operation worked (you should get a - result like the on below): - - - -Create a new variable called `percent_drive` that contains the -percentage of trips driven in each of the lines in the -`desire_lines_1_5km` object with the following command: - -``` r -desire_lines_pcar = desire_lines %>% - mutate(percent_drive = car_driver / all * 100) -``` - -- Find the top 100 most ‘car dependent’ short desire lines in West - Yorkshire and plot the results. It should look something like this: - - - - ## Legend for line widths not available in view mode. - - - -- Plot the results in an interactive map and explore the results. - Where are the top 100 most car-dependent major commuting desire - lines in West Yorkshire (hint: you may use the `ttm()` function to - switch to interactive mode in **tmap**)? - - - - ## tmap mode set to plotting - - - -## Homework - -- Work through Chapter 12 of Geocomputation with R on Transport - - -- Save your workings in an R script - -Bonus 1 Complete exercise 1 (not bonus) - -Bonus 1 (non technical): answer question 3 - -Bonus 2 (technical): can you reproduce the results for Leeds? This -starting point may be useful: - -``` r -region = "west-yorkshire" -b = "https://github.com/npct/pct-outputs-regional-notR/raw" -u = paste0("/master/commute/msoa/", region) -u_od = paste0(b, u, "/od_attributes.csv") -od = readr::read_csv(u_od) -z = sf::read_sf(paste0(b, u, "/z.geojson")) -cents = sf::read_sf(paste0(b, u, "/c.geojson")) -od_clean = od %>% - select(-id) %>% - filter(geo_code1 %in% cents$geo_code) %>% - filter(geo_code2 %in% cents$geo_code) - - -desire_lines = stplanr::od2line(flow = od_clean, cents) -tm_shape(desire_lines$geometry[1:99]) %>% - tm_lines() -``` diff --git a/practicals/3-data-structures_files/figure-gfm/unnamed-chunk-7-2.png b/practicals/3-data-structures_files/figure-gfm/unnamed-chunk-7-2.png deleted file mode 100644 index f575761..0000000 Binary files a/practicals/3-data-structures_files/figure-gfm/unnamed-chunk-7-2.png and /dev/null differ diff --git a/practicals/4-cleaning.Rmd b/practicals/4-cleaning.Rmd deleted file mode 100644 index a2393b7..0000000 --- a/practicals/4-cleaning.Rmd +++ /dev/null @@ -1,119 +0,0 @@ ---- -title: "Data structures" -subtitle: '
Practical' -author: "Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: github_document -bibliography: ../tds.bib ---- - -## Review of homework exercise: demo then individual Q&A - -```{r, message=FALSE, warning=FALSE} -library(tidyverse) -library(sf) -``` - -## Simple data cleaning - -**Exercises** Try evaluating these lines of code, what goes wrong how could you fix them? IS the only one "correct" solution discuss in pairs how you would approach different types of data cleaning. - -```{r, message=FALSE, eval=FALSE} -as.numeric(c("1","2.2","3,3")) -as.numeric(c("1","","3.3")) -sum(c(3,4,NA)) -mean(c(3,4,NA)) -max(c(3,4,NA)) -d1 <- as.Date("31/11/2020") -d2 <- Sys.Date() -difftime(d2, d1) -NULL == NA -is.logical(NA) -is.null(NA) -is.na(NULL) -is.numeric("1") -is.numeric(1,2) -anyNA(c(6,2,NA)) -1:3 -1:1 -1:0 -seq_len(0) -seq_len(3) -0.1 + 0.2 - 0.3 == 0 - -``` - -## Data cleaning on a big dataset - -Download the file `wu03uk_v3.zip` from the Wicid website: -[wicid.ukdataservice.ac.uk](http://wicid.ukdataservice.ac.uk/cider/wicid/downloads.php). -You should be able to read it in as follows: - -```{r, message=FALSE, eval=FALSE} -unzip("~/Downloads/wu03uk_v3.zip") -d = read_csv("wu03uk_v3.csv") -``` - -Load data representing MSOA zones in Isle of Wight. -You could download zones from a number of places, including https://www.ukdataservice.ac.uk/get-data/geography.aspx - -An easy way to get zone data for West-Yorkshire is with the `pct` package, which can be installed as follows: - -The zones for Isle of Wight can be shown as follows. - -```{r, message=FALSE} -zones = pct::get_pct_zones(region = "isle-of-wight", geography = "msoa") -plot(zones$geometry) -``` - -## Processing/cleaning - -- Clean the names of the `d` object, e.g. using the following commands: - -```{r, eval=FALSE, message=FALSE} -names(d) = snakecase::to_snake_case(names(d)) -names(d)[5] = "metro" -``` - -- Create a new variable called `pcycle` representing the percentage cycling in each OD pair - -- Create a minimal version of the dataset `d` only containing a few key variables - -- What proportion of people in England and Wales are represented in the dataset `d` - -- Create a subset of the object `d` called `d_iow_origins` that only contains routes that originate in Isle of Wight - -```{r, eval=FALSE, echo=FALSE} -head(zones$geo_code) -head(d$area_of_residence) - -summary(d$area_of_residence %in% zones$geo_code) - -d_iow_origins = d %>% - filter(area_of_residence %in% zones$geo_code) %>% - filter(area_of_workplace %in% zones$geo_code) -``` - -- Create a subset that contains only od pairs with origins and destinations in Isle of Wight - -**Bonus: Convert the origin-destination data you have of Isle of Wight into desire lines**, e.g. with: - -```{r, eval=FALSE} -desire_lines = stplanr::od2line(flow = d_iow_origins, zones) -desire_top = desire_lines %>% top_n(100, bicycle) -plot(desire_top) -``` - -```{r, eval=FALSE} -mapview::mapview(desire_top) -``` - -## Working on your own datset / project portfolio - -- Identify, download and clean a dataset to include in your coursework portfolio .Rmd document - -## Homework - -- Consolidate your knowledge of data cleaning by adding reproducible data cleaning code to your in-progress project portfolio. - - diff --git a/practicals/4-cleaning.md b/practicals/4-cleaning.md deleted file mode 100644 index ddcedb9..0000000 --- a/practicals/4-cleaning.md +++ /dev/null @@ -1,117 +0,0 @@ -Data structures -================ -Robin Lovelace -University of Leeds -
- -## Review of homework exercise: demo then individual Q&A - -``` r -library(tidyverse) -library(sf) -``` - -## Simple data cleaning - -**Exercises** Try evaluating these lines of code, what goes wrong how -could you fix them? IS the only one “correct” solution discuss in pairs -how you would approach different types of data cleaning. - -``` r -as.numeric(c("1","2.2","3,3")) -as.numeric(c("1","","3.3")) -sum(c(3,4,NA)) -mean(c(3,4,NA)) -max(c(3,4,NA)) -d1 <- as.Date("31/11/2020") -d2 <- Sys.Date() -difftime(d2, d1) -NULL == NA -is.logical(NA) -is.null(NA) -is.na(NULL) -is.numeric("1") -is.numeric(1,2) -anyNA(c(6,2,NA)) -1:3 -1:1 -1:0 -seq_len(0) -seq_len(3) -0.1 + 0.2 - 0.3 == 0 -``` - -## Data cleaning on a big dataset - -Download the file `wu03uk_v3.zip` from the Wicid website: -[wicid.ukdataservice.ac.uk](http://wicid.ukdataservice.ac.uk/cider/wicid/downloads.php). -You should be able to read it in as follows: - -``` r -unzip("~/Downloads/wu03uk_v3.zip") -d = read_csv("wu03uk_v3.csv") -``` - -Load data representing MSOA zones in Isle of Wight. You could download -zones from a number of places, including - - -An easy way to get zone data for West-Yorkshire is with the `pct` -package, which can be installed as follows: - -The zones for Isle of Wight can be shown as follows. - -``` r -zones = pct::get_pct_zones(region = "isle-of-wight", geography = "msoa") -plot(zones$geometry) -``` - -![](4-cleaning_files/figure-gfm/unnamed-chunk-4-1.png) - -## Processing/cleaning - -- Clean the names of the `d` object, e.g. using the following - commands: - -``` r -names(d) = snakecase::to_snake_case(names(d)) -names(d)[5] = "metro" -``` - -- Create a new variable called `pcycle` representing the percentage - cycling in each OD pair - -- Create a minimal version of the dataset `d` only containing a few - key variables - -- What proportion of people in England and Wales are represented in - the dataset `d` - -- Create a subset of the object `d` called `d_iow_origins` that only - contains routes that originate in Isle of Wight - -- Create a subset that contains only od pairs with origins and - destinations in Isle of Wight - -**Bonus: Convert the origin-destination data you have of Isle of Wight -into desire lines**, e.g. with: - -``` r -desire_lines = stplanr::od2line(flow = d_iow_origins, zones) -desire_top = desire_lines %>% top_n(100, bicycle) -plot(desire_top) -``` - -``` r -mapview::mapview(desire_top) -``` - -## Working on your own datset / project portfolio - -- Identify, download and clean a dataset to include in your coursework - portfolio .Rmd document - -## Homework - -- Consolidate your knowledge of data cleaning by adding reproducible - data cleaning code to your in-progress project portfolio. diff --git a/practicals/5-web.Rmd b/practicals/5-web.Rmd deleted file mode 100644 index 1f8c232..0000000 --- a/practicals/5-web.Rmd +++ /dev/null @@ -1,238 +0,0 @@ ---- -title: "Accessing data from web sources and data cleaning" -subtitle: '
Practical' -author: "Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: github_document -bibliography: ../tds.bib ---- - -```{r, include=FALSE} -knitr::opts_chunk$set(message = FALSE) -``` - -```{r, echo=FALSE, eval=FALSE} -# print pdf -pagedown::chrome_print("practicals/5-web.md") -pagedown::chrome_print("practicals/5-web.html") -browseURL("practicals/5-web.pdf") -piggyback::pb_upload("practicals/5-web.pdf") -piggyback::pb_download_url("practicals/5-web.pdf") -``` - -## Review of homework exercise: demo with RMarkdown then individual Q&A - -We will be using these packages in this practical: - -```{r, eval=FALSE} -library(sf) -library(stats19) -library(tidyverse) -``` - -## Accessing crowd-sourced data from OSM - -- Navigate to https://overpass-turbo.eu/ and play with the interface to see what data is available from OpenStreetMap. Download data on highway=cycleway for Leeds from https://overpass-turbo.eu/ - -- Load the data in R and plot it with your favourite plotting package (e.g. `sf`, `mapview` or `tmap`) - -- Bonus: now try to get the same data using the **osmdata** package - -## Get official data with stats19 - -- Take a read of the stats19 README page and at least one of the articles on it here: https://docs.ropensci.org/stats19/ -- Install and load the stats19 package as with one of the following commands: - -```{r, eval=FALSE} -install.packages("stats19") # the stable version -# remotes::install_github("ropensci/stats19") # the most recent 'development' version -``` - -- Show crashes involving pedestrians in Manchester by reproducing the following lines of code: - -```{r} -library(sf) -library(stats19) -library(tidyverse) -crashes_2018 = get_stats19(year = 2018) -crashes_2018_sf = format_sf(crashes_2018) -crashes_manchester = crashes_2018_sf %>% filter(local_authority_district == "Manchester") -plot(crashes_manchester["accident_severity"]) -casualties_2018 = get_stats19(year = 2018, type = "cas") -crashes_manchester = inner_join(crashes_manchester, casualties_2018) -pedestrian_casualties = crashes_manchester %>% filter(casualty_type == "Pedestrian") -plot(pedestrian_casualties["accident_severity"]) -``` - -- Use the tmap package to create an interactive map of pedestrian casualties in Manchester, starting with the following commands (hint, use `tmaptools::palette_explorer()` and the argument `palette = "Reds"` in the function `tm_dots()`, for example, to change the default colour palette): - -```{r} -library(tmap) -tmap_mode("plot") -``` - -```{r, echo=FALSE} -tm_shape(pedestrian_casualties) + - tm_dots("accident_severity", palette = "RdYlBu") -``` - -- Based on the documentation at https://docs.ropensci.org/stats19/: - - - Download data on road crashes in Great Britain in 2018 - - Filter crashes that happened in Leeds - -- Bonus: make a map of pedestrian casualties in Leeds that shows the speed limit where pedestrians were hit. Explore the results in an interactive map. Where do you think the speed limit should be reduced based on this data? - -The result should look something like this: - -```{r} -crashes_leeds = crashes_2018_sf %>% filter(local_authority_district == "Leeds") -crashes_leeds = inner_join(crashes_leeds, casualties_2018) -pedestrian_casualties = crashes_leeds %>% filter(casualty_type == "Pedestrian") -tm_shape(pedestrian_casualties) + - tm_dots("speed_limit") -``` - -- Bonus: what is the relationship between crash severity and the speed limit? - -- Bonus: download and visualise the Leeds Bradford Cycle Superhighway data with these commands: - -```{r, eval=FALSE} -library(osmdata) -data_osm = opq("leeds uk") %>% - add_osm_feature(key = "name", value = "Cycle Superhighway 1") %>% - osmdata_sf() -``` - -```{r, eval=FALSE, echo=FALSE} -saveRDS(data_osm, "data_osm_cycle_superhighway.Rds") -piggyback::pb_upload("data_osm_cycle_superhighway.Rds") -piggyback::pb_download_url("data_osm_cycle_superhighway.Rds") -``` - - -```{r} -# if the previous command fails, try: -data_osm = readRDS(url("https://github.com/ITSLeeds/TDS/releases/download/0.20.1/data_osm_cycle_superhighway.Rds")) -cycleway_100m_buffer = stplanr::geo_buffer(data_osm$osm_lines, dist = 100) -crashes_leeds_lon_lat = crashes_leeds %>% st_transform(4326) -crashes_near_cycle_superhighway = crashes_leeds_lon_lat[cycleway_100m_buffer, ] -``` - -```{r, eval=FALSE} -tm_shape(data_osm$osm_lines) + tm_lines() + - tm_shape(crashes_near_cycle_superhighway) + tm_dots("casualty_type") -``` - - -![](https://user-images.githubusercontent.com/1825120/154372076-b3b74387-a4e1-4574-a647-2d8b4a114fad.png) - - - Filter crashes that happened within a 500 m buffer of the cycle infrastructure - - Do cyclists seem safer on the cycle superhighway? - - Bonus: pull down origin-destination data with the `pct` package hosted at: https://github.com/ITSLeeds/pct - -## Get travel to work data with the PCT - -Use the `pct` package's inbuilt help to find out how to get data for West Yorkshire. -Hint: the code below gets zones for Leeds: - -```{r getzones} -library(pct) -head(pct::pct_regions) -# see all regions with View(pct_regions) -zones = get_pct_zones(region = "west-yorkshire") -zones_leeds = zones %>% - filter(lad_name == "Leeds") -zones_leeds %>% - select(bicycle:taxi_other) %>% - plot() -``` - -```{r, eval=FALSE} -?get_pct_routes_fast -``` - -Bonus (if you have time and a decent computer): download and import the 'car.zip' data from https://github.com/ITSLeeds/NTEM2OD/releases - -## Getting data from the web - -Read through Section [8.2](https://geocompr.robinlovelace.net/read-write.html#retrieving-data) and 8.3 of Geocomputation with R. - -Complete Excersises 4, 5, 6 and 7 of the chapter - -## Bonus 1: osmextract - -Take a read of the documentation for the [`osmextract`](https://docs.ropensci.org/osmextract/index.html) package. - -Reproduce the examples - -Get all supermarkets in OSM for West Yorkshire - -Identify all cycleways in West Yorkshire and, using the stats19 data you have already downloaded, identify all crashes that happened near them. - -## Bonus 2: Getting data from statistics.gov.uk - -Identify a region and zonal units of interest from http://geoportal.statistics.gov.uk/ - -- Read them into R as an `sf` object -- Join-on data from a non-geographic object - -- Add a data access section to your in progress portfolio - -## Bonus 3: find another geographic or transport dataset hosted online and write code to download it - -- Share the code on Teams or in a GitHub issue: https://github.com/ITSLeeds/TDS/issues - -## Some suggested links - - -### Boundary data -The list below provides links to some key data sources that may be of use and interest, starting with the most universal and easy to use, and ending in rather specific datasets. -- UK geoportal, providing geographic data at many levels: https://geoportal.statistics.gov.uk -- Other national geoportals exist, such as this: http://www.geoportal.org/ - - -### UK specific resources -Students can register with Edina Digimap. It is a service which will allow you to download UK topographic map data. -https://digimap.edina.ac.uk/webhelp/digimapsupport/about.htm#access/access_to_digimap.htm -UK census data can be accessed from several places including: https://census.edina.ac.uk/bds.html https://www.nomisweb.co.uk/census/2011 -https://www.nomisweb.co.uk/census/2011 - -The UK data service hosts a wide variety of spatial and survey data sets. An example is this recently released land use data -https://reshare.ukdataservice.ac.uk/854240/ -The Placed-Based Carbon Calculator has transport data about the UK with free downloads on the Data page. -https://www.carbon.place - -The University of Leeds is a partner in the Consumer Data Research Centre. Examples of data available include -Access to Healthy Assets & Hazards (AHAH) -https://data.cdrc.ac.uk/dataset/access-healthy-assets-hazards-ahah -And access to online grocery deliveries -https://www.cdrc.ac.uk/e-food-deserts/ -Some CDRC datasets are openly available and others are accessible to students and researchers. You can register with your university of Leeds account - - -### International resources -The United nations have a number of access points for spatial data. For example this for data relating to sustainable development goals: -https://unstats-undesa.opendata.arcgis.com/ -European Union data sets are available here -https://ec.europa.eu/eurostat/web/gisco/geodata - -There are other data sources and links to data. Here are some examples. -http://freegisdata.rtwilson.com/ Note this has a listing of several transport data sources http://www.gadm.org/ boundary datasets for administrative areas around the world. -https://www.gislounge.com/world-gis-data/ links to further resources -https://library.stanford.edu/research/stanford-geospatial-center/data Stanford University directory of data sources http://www.diva-gis.org/gdata available datasets by country. -(availability of these resources may change) - -### Some sources of transport data -Some government agencies publish data. Sometimes these are in spatial formats such as shapefiles. In other cases they are spreadsheets with zone identifiers which can be joined to spatial data. -• UK government transport data: https://ckan.publishing.service.gov.uk/publisher/department-for-transport - -This is an example of a site based in the USA. Most of the data is for the USA, but there are some international data sets. Some sites such as this require you to join or register to use the data. -• https://data.world/datasets/transportation - - - - - - - diff --git a/practicals/5-web.md b/practicals/5-web.md deleted file mode 100644 index 283a6a9..0000000 --- a/practicals/5-web.md +++ /dev/null @@ -1,215 +0,0 @@ -Accessing data from web sources and data cleaning -================ -Robin Lovelace -University of Leeds -
- -## Review of homework exercise: demo with RMarkdown then individual Q&A - -We will be using these packages in this practical: - -``` r -library(sf) -library(stats19) -library(tidyverse) -``` - -## Accessing crowd-sourced data from OSM - -- Navigate to and play with the interface - to see what data is available from OpenStreetMap. Download data on - highway=cycleway for Leeds from - -- Load the data in R and plot it with your favourite plotting package - (e.g. `sf`, `mapview` or `tmap`) - -- Bonus: now try to get the same data using the **osmdata** package - -## Get official data with stats19 - -- Take a read of the stats19 README page and at least one of the - articles on it here: -- Install and load the stats19 package as with one of the following - commands: - -``` r -install.packages("stats19") # the stable version -# remotes::install_github("ropensci/stats19") # the most recent 'development' version -``` - -- Show crashes involving pedestrians in Manchester by reproducing the - following lines of code: - -``` r -library(sf) -library(stats19) -library(tidyverse) -crashes_2018 = get_stats19(year = 2018) -crashes_2018_sf = format_sf(crashes_2018) -crashes_manchester = crashes_2018_sf %>% filter(local_authority_district == "Manchester") -plot(crashes_manchester["accident_severity"]) -``` - -![](5-web_files/figure-gfm/unnamed-chunk-5-1.png) - -``` r -casualties_2018 = get_stats19(year = 2018, type = "cas") -crashes_manchester = inner_join(crashes_manchester, casualties_2018) -pedestrian_casualties = crashes_manchester %>% filter(casualty_type == "Pedestrian") -plot(pedestrian_casualties["accident_severity"]) -``` - -![](5-web_files/figure-gfm/unnamed-chunk-5-2.png) - -- Use the tmap package to create an interactive map of pedestrian - casualties in Manchester, starting with the following commands - (hint, use `tmaptools::palette_explorer()` and the argument - `palette = "Reds"` in the function `tm_dots()`, for example, to - change the default colour palette): - -``` r -library(tmap) -tmap_mode("view") -``` - -![](5-web_files/figure-gfm/unnamed-chunk-7-1.png) - -- Based on the documentation at : - - - Download data on road crashes in Great Britain in 2018 - - Filter crashes that happened in Leeds - -- Bonus: make a map of pedestrian casualties in Leeds that shows the - speed limit where pedestrians were hit. Explore the results in an - interactive map. Where do you think the speed limit should be - reduced based on this data? - -The result should look something like this: - -``` r -crashes_leeds = crashes_2018_sf %>% filter(local_authority_district == "Leeds") -crashes_leeds = inner_join(crashes_leeds, casualties_2018) -pedestrian_casualties = crashes_leeds %>% filter(casualty_type == "Pedestrian") -tm_shape(pedestrian_casualties) + - tm_dots("speed_limit") -``` - -![](5-web_files/figure-gfm/unnamed-chunk-8-1.png) - -- Bonus: what is the relationship between crash severity and the speed - limit? - -- Bonus: download and visualise the Leeds Bradford Cycle Superhighway - data with these commands: - -``` r -library(osmdata) -data_osm = opq("leeds uk") %>% - add_osm_feature(key = "name", value = "Cycle Superhighway 1") %>% - osmdata_sf() -``` - -``` r -# if the previous command fails, try: -data_osm = readRDS(url("https://github.com/ITSLeeds/TDS/releases/download/0.20.1/data_osm_cycle_superhighway.Rds")) -cycleway_100m_buffer = stplanr::geo_buffer(data_osm$osm_lines, dist = 100) -``` - - ## Warning in fun(libname, pkgname): rgeos: versions of GEOS runtime 3.10.1-CAPI-1.16.0 - ## and GEOS at installation 3.9.1-CAPI-1.14.2differ - -``` r -crashes_leeds_lon_lat = crashes_leeds %>% st_transform(4326) -crashes_near_cycle_superhighway = crashes_leeds_lon_lat[cycleway_100m_buffer, ] -``` - -``` r -tm_shape(data_osm$osm_lines) + tm_lines() + - tm_shape(crashes_near_cycle_superhighway) + tm_dots("casualty_type") -``` - -![](https://user-images.githubusercontent.com/1825120/154372076-b3b74387-a4e1-4574-a647-2d8b4a114fad.png) - -- Filter crashes that happened within a 500 m buffer of the cycle - infrastructure -- Do cyclists seem safer on the cycle superhighway? -- Bonus: pull down origin-destination data with the `pct` package - hosted at: - -## Get travel to work data with the PCT - -Use the `pct` package’s inbuilt help to find out how to get data for -West Yorkshire. Hint: the code below gets zones for Leeds: - -``` r -library(pct) -head(pct::pct_regions) -``` - - ## Simple feature collection with 6 features and 1 field - ## Geometry type: MULTIPOLYGON - ## Dimension: XY - ## Bounding box: xmin: -3.193998 ymin: 51.28676 xmax: 0.3339015 ymax: 55.07939 - ## Geodetic CRS: WGS 84 - ## region_name geometry - ## 1 london MULTIPOLYGON (((0.2082447 5... - ## 2 greater-manchester MULTIPOLYGON (((-2.146328 5... - ## 3 liverpool-city-region MULTIPOLYGON (((-2.730525 5... - ## 4 south-yorkshire MULTIPOLYGON (((-1.822229 5... - ## 5 north-east MULTIPOLYGON (((-1.784972 5... - ## 6 west-midlands MULTIPOLYGON (((-1.788081 5... - -``` r -# see all regions with View(pct_regions) -zones = get_pct_zones(region = "west-yorkshire") -zones_leeds = zones %>% - filter(lad_name == "Leeds") -zones_leeds %>% - select(bicycle:taxi_other) %>% - plot() -``` - -![](5-web_files/figure-gfm/getzones-1.png) - -``` r -?get_pct_routes_fast -``` - -Bonus (if you have time and a decent computer): download and import the -‘car.zip’ data from - -## Getting data from the web - -Read through Section -[8.2](https://geocompr.robinlovelace.net/read-write.html#retrieving-data) -and 8.3 of Geocomputation with R. - -Complete Excersises 4, 5, 6 and 7 of the chapter - -## Bonus 1: osmextract - -Take a read of the documentation for the -[`osmextract`](https://docs.ropensci.org/osmextract/index.html) package. - -Reproduce the examples - -Get all supermarkets in OSM for West Yorkshire - -Identify all cycleways in West Yorkshire and, using the stats19 data you -have already downloaded, identify all crashes that happened near them. - -## Bonus 2: Getting data from statistics.gov.uk - -Identify a region and zonal units of interest from - - -- Read them into R as an `sf` object - -- Join-on data from a non-geographic object - -- Add a data access section to your in progress portfolio - -## Bonus 3: find another geographic or transport dataset hosted online and write code to download it - -- Share the code on Teams or in a GitHub issue: - diff --git a/practicals/5-web_files/figure-gfm/loop_bad-1.png b/practicals/5-web_files/figure-gfm/loop_bad-1.png deleted file mode 100644 index 9321f1a..0000000 Binary files a/practicals/5-web_files/figure-gfm/loop_bad-1.png and /dev/null differ diff --git a/practicals/5-web_files/figure-gfm/loop_good-1.png b/practicals/5-web_files/figure-gfm/loop_good-1.png deleted file mode 100644 index 9321f1a..0000000 Binary files a/practicals/5-web_files/figure-gfm/loop_good-1.png and /dev/null differ diff --git a/practicals/5-web_files/figure-gfm/unnamed-chunk-5-2.png b/practicals/5-web_files/figure-gfm/unnamed-chunk-5-2.png deleted file mode 100644 index 06c2276..0000000 Binary files a/practicals/5-web_files/figure-gfm/unnamed-chunk-5-2.png and /dev/null differ diff --git a/practicals/5-web_files/figure-gfm/unnamed-chunk-6-2.png b/practicals/5-web_files/figure-gfm/unnamed-chunk-6-2.png deleted file mode 100644 index 06c2276..0000000 Binary files a/practicals/5-web_files/figure-gfm/unnamed-chunk-6-2.png and /dev/null differ diff --git a/practicals/5-web_files/figure-gfm/unnamed-chunk-8-1.png b/practicals/5-web_files/figure-gfm/unnamed-chunk-8-1.png deleted file mode 100644 index 28ee90d..0000000 Binary files a/practicals/5-web_files/figure-gfm/unnamed-chunk-8-1.png and /dev/null differ diff --git a/practicals/5-web_files/figure-gfm/unnamed-chunk-9-1.png b/practicals/5-web_files/figure-gfm/unnamed-chunk-9-1.png deleted file mode 100644 index 2e62032..0000000 Binary files a/practicals/5-web_files/figure-gfm/unnamed-chunk-9-1.png and /dev/null differ diff --git a/practicals/6-routing-homework.Rmd b/practicals/6-routing-homework.Rmd deleted file mode 100644 index 1311ca1..0000000 --- a/practicals/6-routing-homework.Rmd +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Routing homework" -subtitle: '
Practical' -author: "Malcolm Morgan and Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: github_document -bibliography: - - ../tds.bib - - ../tds.bib ---- - -## Prior set-up and reading (before practical) - -We will use [ITS Go](https://github.com/ITSLeeds/go) to do an easy setup of your computer. - -```{r, eval=FALSE, message=FALSE, warning=FALSE} -source("https://git.io/JvGjF") -``` - -If that does not work the packages we will be using are: - -* sf -* tidyverse -* tmap -* pct -* stplanr -* dodgr -* opentripplanner -* igraph -* osmextract - -Make sure that you have installed these. - -Read the OpenTripPlanner package paper for information on routing [@morgan_opentripplanner_2019]. - - - -Note: for this practical to work you need to have installed a recent version of `stplanr` (at least version 0.8.7). -Check the version you have installed with the following command: - -```{r} -packageVersion("stplanr") -``` - -Install the latest CRAN version with the following commands: - -```{r, eval=FALSE} -install.packages("remotes") # install the remotes package -``` - -If the installation fails, install `terra` with the [following command](https://github.com/rspatial/terra/). - -```{r, eval=FALSE} -install.packages('terra', repos='https://rspatial.r-universe.dev') -``` - - -```{r} -remotes::install_cran("stplanr") # install the stplanr package if not up-to-date -``` - - - - - -## References - - diff --git a/practicals/6-routing-homework.md b/practicals/6-routing-homework.md deleted file mode 100644 index c35c4fb..0000000 --- a/practicals/6-routing-homework.md +++ /dev/null @@ -1,45 +0,0 @@ -Routing homework -================ -Malcolm Morgan and Robin Lovelace -University of Leeds -
- -## Prior set-up and reading (before practical) - -We will use [ITS Go](https://itsleeds.github.io/go/) to do an easy setup -of your computer. - -``` r -source("https://git.io/JvGjF") -``` - -If that does not work the packages we will be using are: - -- sf -- tidyverse -- tmap -- pct -- stplanr -- dodgr -- opentripplanner -- igraph -- osmextract - -Make sure that you have installed these. - -Read the OpenTripPlanner package paper for information on routing -(Morgan et al. 2019). - -## References - -
- -
- -Morgan, Malcolm, Marcus Young, Robin Lovelace, and Layik Hama. 2019. -“OpenTripPlanner for R.” *Journal of Open Source Software* 4 (44): 1926. -. - -
- -
diff --git a/practicals/6-routing.R b/practicals/6-routing.R deleted file mode 100644 index 25bddc5..0000000 --- a/practicals/6-routing.R +++ /dev/null @@ -1,221 +0,0 @@ -packageVersion("stplanr") -remotes::install_cran("stplanr") # install the stplanr package if not up-to-date - -library(sf) # Spatial data functions -library(tidyverse) # General data manipulation -library(stplanr) # General transport data functions -library(dodgr) # Local routing and network analysis -library(opentripplanner) # Connect to and use OpenTripPlanner -library(tmap) # Make maps -library(osmextract) # Download and import OpenStreetMap data -tmap_mode("plot") - - -## ----otpgui, echo = FALSE, fig.align='center', fig.cap="OTP Web GUI"--------------------------------- -# knitr::include_graphics("otp_screenshot.png") -knitr::include_graphics("https://github.com/ITSLeeds/TDS/blob/master/practicals/otp_screenshot.png?raw=true") - - -## ---- echo=FALSE, eval=FALSE------------------------------------------------------------------------- -## piggyback::pb_upload("otp_TDS.zip") -## piggyback::pb_download_url("otp_TDS.zip") -## # https://github.com/ITSLeeds/TDS/releases/download/0.20.1/otp_TDS.zip - - -## # java –Xmx10000M -d64 -jar "D:/OneDrive - University of Leeds/Data/opentripplanner/otp-1.5.0-shaded.jar" --router west-yorkshire --graphs "D:/OneDrive - University of Leeds/Data/opentripplanner/graphs" --server --port 8080 --securePort 8081 - -## sudo update-alternatives --config java - -## # java --version - -## java -version - -## java -Xmx10000M -d64 -jar "/home/robin/programs/otp1.5/otp_TDS/otp-1.5.0-shaded.jar" --router west-yorkshire --graphs "/home/robin/programs/otp1.5/otp_TDS/graphs" --server --port 8080 --securePort 8081 - -## # ip = "xx.x.218.83" - - -## ---- eval=TRUE, message=FALSE, warning=FALSE-------------------------------------------------------- -# ip = "localhost" # to run it on your computer (see final bonus exercise) -ip = "otp.saferactive.org" # an actual server -otpcon = otp_connect(hostname = ip, - port = 80, - router = "west-yorkshire") - - -## ---- message=FALSE---------------------------------------------------------------------------------- -u = "https://github.com/ITSLeeds/TDS/releases/download/22/NTEM_flow.geojson" -desire_lines = read_sf(u) -head(desire_lines) - - -## ---- message=FALSE---------------------------------------------------------------------------------- -u = "https://github.com/ITSLeeds/TDS/releases/download/22/NTEM_cents.geojson" -centroids = read_sf(u) -head(centroids) - - -## ---- message=FALSE, echo=TRUE----------------------------------------------------------------------- -tmap_mode("plot") #Change to view for interactive map -tm_shape(desire_lines) + - tm_lines(lwd = "all", col = "all", scale = 4, palette = "viridis") - - -## ---- message=FALSE, echo=FALSE---------------------------------------------------------------------- -tmap_mode("plot") #Change to view for interactive map -tm_shape(desire_lines) + - tm_lines(lwd = "rail", col = "rail", scale = 4, palette = "viridis", style = "jenks") - - -## ---- eval=TRUE, echo=FALSE-------------------------------------------------------------------------- -desire_top = desire_lines %>% - top_n(n = 3, wt = all) - - -## ---- message=FALSE---------------------------------------------------------------------------------- -routes_drive_top = route(l = desire_top, route_fun = otp_plan, otpcon = otpcon, mode = "CAR") - - -## ---- eval=FALSE, echo=FALSE------------------------------------------------------------------------- -## # Old way of doing it using zone centroids: -## fromPlace = centroids[match(desire_top$from, centroids$Zone_Code),] -## toPlace = centroids[match(desire_top$to, centroids$Zone_Code),] -## routes_drive_top = otp_plan(otpcon = otpcon, -## fromPlace = fromPlace, -## toPlace = toPlace, -## fromID = fromPlace$Zone_Code, -## toID = toPlace$Zone_Code, -## mode = "CAR") -## waldo::compare(routes_drive_top, routes_drive_top_new) - - -## ---------------------------------------------------------------------------------------------------- -tmap_mode("plot") -tm_shape(routes_drive_top) + tm_lines() - - -## ---- message=FALSE, eval=TRUE----------------------------------------------------------------------- -isochrone = otp_isochrone(otpcon, fromPlace = c(-1.558655, 53.807870), - mode = c("BICYCLE","TRANSIT"), - maxWalkDistance = 3000) -isochrone$time = isochrone$time / 60 -tm_shape(isochrone) + - tm_fill("time", alpha = 0.6) - - -## ---- message=FALSE, eval=FALSE, echo=FALSE---------------------------------------------------------- -## routes_drive = route(l = desire_lines, route_fun = otp_plan, otpcon = otpcon, mode = "CAR") -## # fromPlace = centroids[match(desire_lines$from, centroids$Zone_Code),] -## # toPlace = centroids[match(desire_lines$to, centroids$Zone_Code),] -## # routes_drive = otp_plan(otpcon = otpcon, -## # fromPlace = fromPlace, -## # toPlace = toPlace, -## # fromID = fromPlace$Zone_Code, -## # toID = toPlace$Zone_Code, -## # mode = "CAR", -## # ncores = 20) -## -## sf::write_sf(routes_drive, "routes_drive.geojson", delete_dsn = TRUE) -## piggyback::pb_upload("routes_drive.geojson") -## -## date_time = lubridate::ymd_hms("2022-01-25 09:00:00") -## routes_transit = route( -## l = desire_lines, route_fun = otp_plan, otpcon = otpcon, -## mode = c("WALK","TRANSIT"), date_time = date_time, maxWalkDist = 2000 -## ) -## -## # routes_transit = otp_plan(otpcon = otpcon, -## # fromPlace = fromPlace, -## # toPlace = toPlace, -## # fromID = fromPlace$Zone_Code, -## # toID = toPlace$Zone_Code, -## # mode = c("WALK","TRANSIT"), -## # date_time = date_time, -## # ncores = 20, -## # maxWalkDistance = 2000) -## summary(st_is_valid(routes_transit)) -## routes_transit = st_make_valid(routes_transit) -## sf::write_sf(routes_transit, "routes_transit.geojson", delete_dsn = TRUE) -## piggyback::pb_upload("routes_transit.geojson") - - -## ---- message=FALSE, eval=TRUE, echo=TRUE------------------------------------------------------------ -u = "https://github.com/ITSLeeds/TDS/releases/download/22/routes_drive.geojson" -routes_drive = read_sf(u) -u = "https://github.com/ITSLeeds/TDS/releases/download/22/routes_transit.geojson" -routes_transit = read_sf(u) - - -## ---- message=FALSE, eval=TRUE, echo=FALSE----------------------------------------------------------- -n_driver = desire_lines %>% - st_drop_geometry() %>% - select(from, to, drive) - - -## ---- message=FALSE, eval=TRUE, echo=FALSE----------------------------------------------------------- -routes_drive = left_join(routes_drive, n_driver, by = c("fromPlace" = "from","toPlace" = "to")) - - -## ---- message=FALSE, eval=TRUE, echo=TRUE------------------------------------------------------------ -rnet_drive = overline(routes_drive, "drive") - - -## ---- message=FALSE, eval=TRUE, echo=FALSE----------------------------------------------------------- -tm_shape(rnet_drive) + - tm_lines(lwd = 2, col = "drive", style = "jenks", palette = "-viridis") - - -## ---- eval=TRUE, message=FALSE, echo=FALSE----------------------------------------------------------- -routes_transit = routes_transit %>% - filter(route_option == 1) %>% - select(fromPlace, toPlace, distance) - - -## ---- eval=TRUE, message=FALSE----------------------------------------------------------------------- -routes_transit_group = routes_transit %>% - dplyr::group_by(fromPlace, toPlace) %>% - dplyr::summarise(distance = sum(distance)) - - -## ---- eval=FALSE------------------------------------------------------------------------------------- -## routes_transit_group_ml = routes_transit_group[st_geometry_type(routes_transit_group) == "MULTILINESTRING", ] -## routes_transit_group = routes_transit_group[st_geometry_type(routes_transit_group) != "MULTILINESTRING", ] -## routes_transit_group_ml = st_line_merge(routes_transit_group_ml) -## routes_transit_group = rbind(routes_transit_group, routes_transit_group_ml) - - -## ---- message=FALSE, eval=TRUE, echo=FALSE----------------------------------------------------------- -tm_shape(routes_transit_group) + - tm_lines(lwd = 2, col = "black", palette = "-viridis") - - -## ---- eval=TRUE, warning=FALSE, message=FALSE, results='hide'---------------------------------------- -roads = oe_get("Isle of Wight", extra_tags = c("maxspeed","oneway")) -roads = roads[!is.na(roads$highway),] -road_types = c("primary","primary_link", - "secondary","secondary_link", - "tertiary", "tertiary_link", - "residential","unclassified") -roads = roads[roads$highway %in% road_types, ] -graph = weight_streetnet(roads) - - -## ---- message=TRUE, echo=TRUE------------------------------------------------------------------------ -estimate_centrality_time(graph) -centrality = dodgr_centrality(graph) - - -## ---- message=TRUE, echo=TRUE, results='hide'-------------------------------------------------------- -clear_dodgr_cache() -centrality_sf = dodgr_to_sf(centrality) - - -## ---- message=TRUE, echo=FALSE----------------------------------------------------------------------- -tm_shape(centrality_sf) + - tm_lines("centrality", - lwd = 3, - n = 8, - style = "fisher", - palette = "-viridis") - - diff --git a/practicals/6-routing.Rmd b/practicals/6-routing.Rmd deleted file mode 100644 index 6da0ab6..0000000 --- a/practicals/6-routing.Rmd +++ /dev/null @@ -1,194 +0,0 @@ ---- -title: "Routing" -subtitle: '
Practical' -author: "Malcolm Morgan" -date: 'University of Leeds, `r Sys.Date()`
' -output: github_document -bibliography: ../references.bib ---- - -## Setting Up (10 minutes) - -We will be using several R packages and will also need to get an API key for [cyclestreets](https://www.cyclestreets.net/api/apply/) - -Unfortunately, some of the most interesting packages for routing are not yet available on CRAN. In this practical we will introduce three CRAN packages [cyclestreets](https://cran.r-project.org/web/packages/cyclestreets/index.html), [dodgr](https://cran.r-project.org/web/packages/dodgr/index.html), and [igraph](https://cran.r-project.org/web/packages/igraph/index.html) and one GitHub package [transportAPI](https://github.com/ITSLeeds/transportAPI). - -There are also some bonus exercises using the [Open Trip Planner](https://github.com/ITSLeeds/opentripplanner) - -To install packages from GitHub you will need the `devtools` package. **Note:** GitHub packages have not been reviewed so install at your own risk. - -```{r, eval=FALSE} -# Install packages from CRAN (as required) -list.of.packages <- c("sf", "stplanr","cyclestreets","devtools","dodgr","igraph","usethis","tmap") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] -if(length(new.packages)) install.packages(new.packages) -rm(list.of.packages, new.packages) - -# Install packages from GitHub -devtools::install_github("mem48/transportAPI") -devtools::install_github("ITSleeds/opentripplanner") # For the bonus exercises - -# Load packages -library(sf) -library(stplanr) -library(tmap) -library(cyclestreets) -library(transportAPI) -library(dodgr) - -``` - -Now you will need to add your CycleStreets API key to your R Environment - -```{r, eval=FALSE} -usethis::edit_r_environ() -# Add CYCLESTREETS=your_key_here as a new line in your .Renviron file -``` - -You may need to restart R for the changes to come into effect. - - -We will also need some sample data, for this practical we will be using data about the Isle of Wight. - -* Commuter flow data from the [PCT](https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/isle-of-wight/od_attributes.csv) -* MSOA centroids from the [PCT](https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/isle-of-wight/c.geojson) -* The Open Street Map for the Isle of Wight from [Geofabrik](http://download.geofabrik.de/europe/great-britain/england/isle-of-wight.html) - -```{r, eval=FALSE} -flow <- read.csv("https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/isle-of-wight/od_attributes.csv", - stringsAsFactors = FALSE) -flow <- flow[flow$geo_code1 != flow$geo_code2,] -flow <- flow[flow$all > 600,] # Subset out the largest flows - -centroids <- st_read("https://github.com/npct/pct-outputs-regional-notR/raw/master/commute/msoa/isle-of-wight/c.geojson") -centroids <- centroids[,"geo_code"] - -roads <- st_read("http://download.geofabrik.de/europe/great-britain/england/isle-of-wight-latest.osm.pbf", layer = "lines") -roads <- roads[!is.na(roads$highway),] # Subset to just the roads -``` - -** Exercises ** - -Install and load the folloing packages: - -* `sf` -* `stplanr` -* `cyclestreets` -* `tmap` -* `transportAPI` -* `opentripplanner` - optional - -Add the CycleStreets API Key to your R Environment - -Add the TransportAPI API key to your R Environment (Optional) - -Download and load the example data - -## Basic Routing - -Let's start with finding a simple route from A to B. We will use two different routing services - -```{r, eval=FALSE} -from <- c(-1.155884, 50.72186) -to <- c(-1.173878, 50.72301) -r_cs <- cyclestreets::journey(from, to) -r_tapi <- transportAPI::tapi_journey(from, to, apitype = "public", base_url = "http://fcc.transportapi.com/") -tmap_mode("view") -qtm(r_cs) + - qtm(r_tapi) - -``` - -Notice that `cyclestreets` has returned 8 rows, one for each road on the journey. While TransportAPI has returned 4 rows one row representing a direct walk, the other three a walk, bus, walk route. Notice the `route_option` and `route_stage` columns. - -Let's suppose you want a single line for each route. - -```{r, eval=FALSE} -r_cs$routeID <- 1 -r_cs <- r_cs %>% - dplyr::group_by(routeID) %>% - dplyr::summarise(distances = sum(distances), - time = sum(time), - busynance = sum(time)) - -``` - -We now have a single row but instead of a `LINESTRING` wen now have a `MULTILINESTRING`, we can convert to a linestring by using `st_line_merge()`. Note how the different columns where summarised. - -```{r, eval=FALSE} -st_geometry_type(r_cs) -r_cs <- st_line_merge(r_cs) -st_geometry_type(r_cs) -``` - -** Exercise ** -Experiment with routing can you find out how to: - -* Route for driving and cycling using transportAPI -* Change the date and time of travel with transportAPI -* Find fast and quiet routes from cyclestreets - -Hint: Try using `?tapi_journey` to view the help files - -## Batch Routing - -One route is useful, may many routes is better! We will find the routes for the 8 most commuter desire lines on the Isle of Wight. First, we must turn the flow data into a set of start and end points. We will use the `stplanr` package. The `od2odf` function returns the start and end coordinates by combing the `flow` and `centroids` datasets by the shared `geo_code`. - -```{r, eval=FALSE} -flow2 <- stplanr::od2odf(flow[,c("geo_code1","geo_code2")], as(centroids,"Spatial")) -# Note this function does not currently work with SF, so as("Spatial") required -head(flow2) -``` - - -The `cyclestreets` package doe not have an inbuilt batch routing option so we must build a simple loop - -```{r, eval=FALSE} -routes_cs <- list() -for(i in 1:nrow(flow2)){ - r_cs_sub <- cyclestreets::journey(as.numeric(flow2[i,3:4]), as.numeric(flow2[i,5:6])) - r_cs_sub$routeID <- paste0(flow2$code_o[i]," ",flow2$code_d[i]) - routes_cs[[i]] <- r_cs_sub -} -``` - -This leaves us with a list of data.frames. Which we can combine using `do.call(rbind)`. **Note** for large lists this is slow. consider using `dplyr::bind_rows()` and rebuilding the geometry column. We can also group them into a single line for each route. - -```{r, eval=FALSE} -routes_cs <- do.call(rbind,routes_cs) -routes_cs <- routes_cs %>% - dplyr::group_by(routeID) %>% - dplyr::summarise(distances = sum(distances), - time = sum(time), - busynance = sum(time)) -qtm(routes_cs) -``` - -The `transportAPI` package has a builtin batch routing function, and will also accept an SF point input. - -```{r, eval=FALSE} -from <- dplyr::left_join(flow, centroids, by = c("geo_code1" = "geo_code")) -to <- dplyr::left_join(flow, centroids, by = c("geo_code2" = "geo_code")) -routes_tapi <- transportAPI::tapi_journey_batch(from$geometry, to$geometry, - from$geo_code1, to$geo_code2, - base_url = "http://fcc.transportapi.com/") - -``` - -**Exercises** - -Examine the different results produced by cyclestreets and transportAPI. - -* how would you compare travel times by bike an public transport? - -## Network Analysis - -We will now look to analyse the road network using `dodgr`. - -```{r, eval=FALSE} -streetnet <- dodgr::weight_streetnet(roads) -distances <- dodgr::dodgr_dists(streetnet, as.matrix(flow2[,3:4]), as.matrix(flow2[,5:6])) -colnames(distances) <- flow2$code_d -rownames(distances) <- flow2$code_o -distances -``` diff --git a/practicals/6-routing.md b/practicals/6-routing.md deleted file mode 100644 index de16e84..0000000 --- a/practicals/6-routing.md +++ /dev/null @@ -1,303 +0,0 @@ -Routing -================ -Malcolm Morgan -University of Leeds -
- -## Setting Up - -If you have not installed the package before hand. You can use [ITS -Go](https://itsleeds.github.io/go/) to do an easy setup of your computer - -``` r -source("https://git.io/JvGjF") -``` - -The packages we will be using are: - -``` r -library(sf) # Spatial data functions -library(tidyverse) # General data manipulation -library(stplanr) # General transport data functions -library(dodgr) # Local routing and network analysis -library(opentripplanner) # Connect to and use OpenTripPlanner -library(tmap) # Make maps -library(osmextract) # Download and import OpenStreetMap data -tmap_mode("plot") -``` - -## Using OpenTripPlanner to get routes - -We have setup the Multi-modal routing service OpenTripPlanner for West -Yorkshire. Try typing this URL — otp. saferactive. org (no spaces) — -during the session into your browser. You should see something like -this: - -OTP Web GUI - -**Exercise** - -1. Play with the web interface, finding different types of routes. What - strengths/limitations can you find? - -### Connecting to OpenTripPlanner - -To allow R to connect to the OpenTripPlanner server, we will use the -`opentripplanner` package and the function `otp_connect`. - -``` r -# ip = "localhost" # to run it on your computer (see final bonus exercise) -ip = "otp.saferactive.org" # an actual server -otpcon = otp_connect(hostname = ip, - port = 80, - router = "west-yorkshire") -``` - -If you have connected successfully, then you should get a message -“Router exists.” - -To get some routes, we will start by importing some data we have used -previously. Note that the data frame has 78 columns (only a few of which -are useful) and 1k rows: - -``` r -u = "https://github.com/ITSLeeds/TDS/releases/download/22/desire_lines_100.geojson" -desire_lines = read_sf(u) -dim(desire_lines) -``` - - ## [1] 101 78 - -**Exercise** - -2. Subset the and overwrite the `desire_lines` data frame with the `=` - assignment operator so that it only has the following columns: - geo_code1, geo_code2, all, bicycle, foot, car_driver, and geometry. - You can test the that the operation worked by executing the object - name, the result should look like that shown below. - -3. Use the `tmap` package to plot the `desire_lines`. Choose different - ways to visualise the data so you can understand local commuter - travel patterns. See example plot below. - -![](6-routing_files/figure-gfm/unnamed-chunk-11-1.png) - -This dataset has desire lines, but most routing packages need start and -endpoints, so we will extract the points from the lines using the -`stplanr::line2df` function. An then select the top 3 desire lines. - -**Exercise** - -4. Produce a data frame called `desire` which contains the coordinates - of the start and endpoints of the lines in `desire_lines` but not - the geometries. Hint `?stplanr::line2df` and `?dplyr::bind_cols` - -5. Subset out the top three desire lines by the total number of - commuters and create a new data frame called `desire_top`. Hint - `?dplyr::slice_max` - -``` r -desire_top = desire_lines %>% - top_n(n = 3, wt = car_driver) -``` - -6. Find the driving routes for `desire_top` and call them `routes_top` - using `opentripplanner::otp_plan` - -To find the routes for the first three desire lines use the following -command: - -``` r -routes_drive_top = route( - l = desire_top, - route_fun = otp_plan, - mode = "CAR", - otpcon = otpcon - ) -``` - -7. Plot `routes_drive_top` using the `tmap` package in interactive - mode. You should see something like the image below. - -``` r -tmap_mode("view") -``` - - ## tmap mode set to interactive viewing - -``` r -tm_shape(routes_drive_top) + tm_lines() -``` - -![](6-routing_files/figure-gfm/unnamed-chunk-14-1.png) - -``` r -tmap_mode("plot") -``` - - ## tmap mode set to plotting - -We can also get Isochrones from OTP. - -``` r -sf::sf_use_s2(FALSE) -isochrone = otp_isochrone(otpcon, fromPlace = c(-1.558655, 53.807870), - mode = c("BICYCLE","TRANSIT"), - maxWalkDistance = 3000) -isochrone$time = isochrone$time / 60 -tm_shape(isochrone) + - tm_fill("time", alpha = 0.6) -``` - -![](6-routing_files/figure-gfm/unnamed-chunk-15-1.png) - -To save overloading the server, we have pre-generated some extra routes. -Download these routes and load them into R. - -``` r -u = "https://github.com/ITSLeeds/TDS/releases/download/22/routes_drive.geojson" -routes_drive = read_sf(u) -u = "https://github.com/ITSLeeds/TDS/releases/download/22/routes_transit.geojson" -routes_transit = read_sf("transit_routes.gpkg") -``` - -## Route Networks - -Route networks (also called flow maps) show transport demand on -different parts of the road network. - -Now we have the number of commuters and their routes, we can produce a -route network map using `stplanr::overline`. - -``` r -rnet_drive <- overline(routes_drive, "car_driver") -``` - -**Exercise** 15. Make a route network for driving and plot it using the -`tmap` package. How is is different from just plotting the routes? - -![](6-routing_files/figure-gfm/unnamed-chunk-20-1.png) - -## Line Merging - -Notice that `routes_transit` has returned separate rows for each mode -(WALK, RAIL). Notice the `route_option` column shows that some routes -have multiple options. - -Let’s suppose you want a single line for each route. - -**Exercise** - -16. Filter the `routes_transit` to contain only one route option per - origin-destination pair. - -Now We will group the separate parts of the routes together. - -``` r -routes_transit_group = routes_transit %>% - dplyr::group_by(fromPlace, toPlace) %>% - dplyr::summarise(distance = sum(distance)) -``` - -We now have a single row, but instead of a `LINESTRING`, we now have a -mix of `MULTILINESTRING` and `LINESTRING`, we can convert to a -`LINESTRING` by using `st_line_merge()`. Note how the different columns -where summarised. - -First, we must separate out the `MULTILINESTRING` and `LINESTRING` - -``` r -routes_transit_group_ml = routes_transit_group[st_geometry_type(routes_transit_group) == "MULTILINESTRING", ] -routes_transit_group = routes_transit_group[st_geometry_type(routes_transit_group) != "MULTILINESTRING", ] -routes_transit_group_ml = st_line_merge(routes_transit_group_ml) -routes_transit_group = rbind(routes_transit_group, routes_transit_group_ml) -``` - -**Exercise** - -17. Plot the transit routes, what do you notice about them? - -![](6-routing_files/figure-gfm/unnamed-chunk-24-1.png) - -**Bonus Exercise**: - -18. Redo exercise 16 but make sure you always select the fastest option. - -## Network Analysis (dodgr) - -**Note** Some people have have problems running dodgr on Windows, if you -do follow these -[instructions](https://github.com/ITSLeeds/TDS/blob/master/practicals/dodgr-install.md). - -We will now analyse the road network using `dodgr`. Network analysis can -take a very long time on large areas. So we will use the example of the -[Isle of Wight](https://en.wikipedia.org/wiki/Isle_of_Wight), which is -ideal for transport studies as it is small, but has a full transport -system including a railway and the last commercial hovercraft service in -the world. - -First we need to download the roads network from the OpenStreetMap using -`osmextract::oe_get`. We will removed most of the paths and other -features and just focus on the main roads. Then use -`dodgr::weight_streetnet` to produce a graph of the road network. - -``` r -roads = oe_get("Isle of Wight", extra_tags = c("maxspeed","oneway")) -roads = roads[!is.na(roads$highway),] -road_types = c("residential","secondary","tertiary", - "unclassified","primary","primary_link", - "secondary_link","tertiary_link") -roads = roads[roads$highway %in% road_types, ] -graph = weight_streetnet(roads) -``` - -We will find the betweenness centrality of the Isle of Wight road -network. This can take a long time, so first lets check how long it will -take. - -``` r -estimate_centrality_time(graph) -``` - - ## Estimated time to calculate centrality for full graph is 00:00:04 - -``` r -centrality = dodgr_centrality(graph) -``` - -We can convert a `dodgr` graph back into a sf data frame for plotting -using `dodgr::dodgr_to_sf` - -``` r -clear_dodgr_cache() -centrality_sf = dodgr_to_sf(centrality) -``` - - ## old-style crs object detected; please recreate object with a recent sf::st_crs() - -**Exercise** - -19. Plot the centrality of the Isle of Wight road network. What can - centrality tell you about a road network? - -![](6-routing_files/figure-gfm/unnamed-chunk-28-1.png) - -20. Use `dodgr::dodgr_contract_graph` before calculating centrality, how - does this affect the computation time and the results? - -**Bonus Exercises** - -21. Work though the OpenTripPlanner vignettes [Getting - Started](https://docs.ropensci.org/opentripplanner/articles/opentripplanner.html) - and [Advanced - Features](https://docs.ropensci.org/opentripplanner/articles/advanced_features.html) - to run your own local trip planner for the Isle of Wight. - -**Note** To use OpenTripPlanner on your own computer requires Java 8. -See the -[Prerequisites](https://docs.ropensci.org/opentripplanner/articles/prerequisites.html) -for more details. If you can’t install Java 8 try some of the examples -in the vignettes but modify them for West Yorkshire. - -22. Read the `dodgr` - [vignettes](https://atfutures.github.io/dodgr/articles/index.html) diff --git a/practicals/6-routing_files/figure-gfm/unnamed-chunk-17-1.png b/practicals/6-routing_files/figure-gfm/unnamed-chunk-17-1.png deleted file mode 100644 index 8076c33..0000000 Binary files a/practicals/6-routing_files/figure-gfm/unnamed-chunk-17-1.png and /dev/null differ diff --git a/practicals/6-routing_files/figure-gfm/unnamed-chunk-18-1.png b/practicals/6-routing_files/figure-gfm/unnamed-chunk-18-1.png deleted file mode 100644 index b0ca1cc..0000000 Binary files a/practicals/6-routing_files/figure-gfm/unnamed-chunk-18-1.png and /dev/null differ diff --git a/practicals/7-viz.Rmd b/practicals/7-viz.Rmd deleted file mode 100644 index ea4a4da..0000000 --- a/practicals/7-viz.Rmd +++ /dev/null @@ -1,193 +0,0 @@ ---- -title: "Data Visualisation" -subtitle: '
Practical' -author: "Robin Lovelace" -date: 'University of Leeds `r # Sys.Date()()`
' -output: github_document ---- - -```{r, echo=FALSE, eval=FALSE} -# print pdf -#pagedown::chrome_print("practicals/7-viz.Rmd") -#pagedown::chrome_print("practicals/7-viz.html") -#browseURL("practicals/5-web.pdf") -#piggyback::pb_upload("practicals/7-viz.pdf") -#piggyback::pb_download_url("practicals/7-viz.pdf") -``` - -## Review of homework exercise: demo then individual Q&A - -```{r, message=FALSE} -library(tidyverse) -library(stplanr) -library(sf) -``` - -## Basics of data visualisation - -Data visualisation is a powerful way to help understand and communicate insights about the real world contained in data. - -Take this dataset: - -```{r,message=FALSE} -crashes_gb = stats19::get_stats19(year = 2018, type = "collision") -crashes_gb -``` - -Without some kind of analysis, it is hard to understand the meaning of the data. -Descriptive statistics can help, e.g. on the percentage of different crashes that happened by speed limit: - -```{r} -count_table = table(crashes_gb$speed_limit, crashes_gb$accident_severity) -count_table -count_table_proportions = count_table / rowSums(count_table) -round(count_table_proportions * 100, 1) -``` - -As expected, crashes that happened on roads with a higher speed limit have a higher fatality rate. -This can be visualised in a more appealing and intuitive way, e.g. as follows: - -```{r} -library(ggplot2) -ggplot(crashes_gb) + geom_bar(aes(accident_severity)) -``` - -```{r} -ggplot(crashes_gb, aes(speed_limit)) + geom_bar(aes(fill = accident_severity), position = "fill") -``` - -**Exercise**: Improve the plots by: - -1. Label the x-axis "Speed Limit (MPH)" -1. Adjust the x-axis to include a label for each column (e.g. 20, 30, 40, 50, 60, 70), Hint: `scale_x_continuous` -1. Label the y-axis "Percentage of all collisions" -1. Adjust the y-axis to be 0-100 rather than 0-1. Hint: `scales::percent_format()` -1. Label the legend "Collision Severity" -1. Change the colour palette - -```{r, eval=FALSE, echo=FALSE} -ggplot(crashes_gb, aes(speed_limit)) + - geom_bar(aes(fill = accident_severity), position = "fill", palette = "Blues") + - xlab("Speed Limit (MPH)") + - ylab("Percentage of all collisions") + - labs(fill="Collision Severity") + - scale_x_continuous(breaks=seq(20,70,10)) + - scale_y_continuous(labels = scales::percent_format(), limits = c(0, 1)) + - scale_fill_brewer(palette="Dark2") - -``` - - -**Exercise**: Read the documentation that appears when you run this line of code: - -``` -?facet_grid -``` - -Create a faceted plot with a different facet for each speed limit. - -```{r, echo=FALSE} -ggplot(crashes_gb) + geom_bar(aes(accident_severity)) + - facet_grid(~ speed_limit) -``` - -**Exercise**: Improve this plot by rotating the label text 90 degrees. Hint: `theme` & `element_text` - -```{r, echo=FALSE, eval=FALSE} -ggplot(crashes_gb) + geom_bar(aes(accident_severity)) + - facet_grid(~ speed_limit) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -``` - -**Exercise**: Adjust this plot to produce a facet for each severity and bar for each speed - -```{r, echo=FALSE, eval=FALSE} -ggplot(crashes_gb) + geom_bar(aes(speed_limit)) + - facet_grid(~ accident_severity) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -``` - - - -## Map based plots - -1. Create a map showing the number of people walking in the `stplanr` dataset `flowlines_sf` using: --base graphics (hint: use `plot()`) and --**tmap** (hint: use `tm_shape(flowlines_sf) + ...`, `palette = "viridis"` and other options shown in `tmaptools::palette_explorer()` give different colourschemes). - - Name two advantages of each approach - - Bonus: What three other mapping packages could be used to show the same data? - - -## Result: base graphics - -```{r, echo=FALSE, out.width="70%", fig.show='hold', message=FALSE} -library(stplanr) -lwd = flowlines_sf$All / mean(flowlines_sf$All) * 3 -plot(flowlines_sf["Taxi"], lwd = lwd) - -``` - -## Result: tmap - -```{r, message=FALSE, echo=FALSE} -library(tmap) -tmap_mode("plot") -tm_shape(flowlines_sf) + - tm_lines(col = "Taxi", lwd = "All", scale = 9, palette = "viridis") -``` - - - -## Exercises - -- Based on the `routes_fast_sf` data in the `stplanr` package, identify roads where investment in cycling is likely to be effective. - -Starting point: - -```{r} -nrow(flowlines_sf) -nrow(routes_fast_sf) -# routes_fast_sf = dplyr::inner_join(routes_fast_sf, flow) -routes_fast_sf = cbind(routes_fast_sf, flowlines_sf) -``` - - - -## sf results - -```{r, echo=FALSE} -plot(routes_fast_sf["Bicycle"], lwd = routes_fast_sf$All / 50) -``` - -***Exercise*** - -Create this map as an iterative plot using `tmap` - -```{r, echo=FALSE, eval=FALSE} -tmap_mode("view") -tm_shape(routes_fast_sf) + - tm_lines(col = "Bicycle", lwd = "All", scale = 9, palette = "RdYlBu") -``` - - -## Create a complex plot - -Choose an example from https://r-graph-gallery.com/best-r-chart-examples.html and try to recreate it. - -These example come with code you can copy, so it should be easy to create the example plot. Then try to understand what each step in the process is doing. Try commenting out specific lines of code or changing settings to make your own version of the plot. Can you make an even better plot? - -## Apply your new skills to your coursework data - -By now you should have identified some data to use in your coursework. Produce a plot that helps you understand that data. Take the oppertunity to get feeback on your analysis and plot design from the demonstrators. - - -## Exercises bonus (optional) - -- Work through the PCT training vignette at: https://itsleeds.github.io/pct/articles/pct_training.html -- Using data in the `pct` github package, estimate cycling potential in a city of your choice in the UK, and show the results. - - -## Homework - -- Work on your portfolios, adding a new visualisation of the data you will use. Pepare questions for the next session which is for coursework support. - diff --git a/practicals/7-viz.md b/practicals/7-viz.md deleted file mode 100644 index 14f8103..0000000 --- a/practicals/7-viz.md +++ /dev/null @@ -1,203 +0,0 @@ -Data Visualisation -================ -Robin Lovelace -University of Leeds -
- -## Review of homework exercise: demo then individual Q&A - -``` r -library(tidyverse) -library(stplanr) -library(sf) -``` - -## Basics of data visualisation - -Data visualisation is a powerful way to help understand and communicate -insights about the real world contained in data. - -Take this dataset: - -``` r -crashes_gb = stats19::get_stats19(year = 2018, type = "collision") -crashes_gb -``` - - ## # A tibble: 122,635 × 37 - ## accident_index accident_year accident_reference location_easting_osgr - ## - ## 1 2018010080971 2018 010080971 529150 - ## 2 2018010080973 2018 010080973 542020 - ## 3 2018010080974 2018 010080974 531720 - ## 4 2018010080981 2018 010080981 541450 - ## 5 2018010080982 2018 010080982 543580 - ## 6 2018010080983 2018 010080983 526060 - ## 7 2018010080986 2018 010080986 525050 - ## 8 2018010080987 2018 010080987 536710 - ## 9 2018010080990 2018 010080990 517110 - ## 10 2018010080992 2018 010080992 535450 - ## # ℹ 122,625 more rows - ## # ℹ 33 more variables: location_northing_osgr , longitude , - ## # latitude , police_force , accident_severity , - ## # number_of_vehicles , number_of_casualties , date , - ## # day_of_week , time