Merge pull request #63 from philmikejones/docs

merge pkdown docs
philmikejones · Oct 10, 2017 · 7e2a4b0 · 7e2a4b0
2 parents fbdc847 + aa1cd1e
commit 7e2a4b0
Show file tree

Hide file tree

Showing 30 changed files with 2,425 additions and 117 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -10,3 +10,5 @@ data-raw/
 ^CONDUCT\.md$
 inst/
 revdep/
+docs/
+^docs$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,12 @@
 Package: rakeR
 Title: Easy Spatial Microsimulation (Raking) in R
-Version: 0.2.0.9000
-Date: 2017-06-30
+Version: 0.2.1
+Date: 2017-10-10
 Authors@R: c(
     person("Phil Mike", "Jones", 
       email = "[email protected]",
-      role = c("aut", "cre")),
+      role = c("aut", "cre"),
+      comment = c(ORCID = "0000-0001-5173-3245")),
     person("Robin", "Lovelace", role = "aut",
       comment = "Many functions are based on code by Robin Lovelace and Morgane Dumont"),
     person("Morgane", "Dumont", role = "aut",

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,10 @@
-v 0.2.0.9000
-============
+v 0.2.1
+=======
+
+Patch release:
+
+* Fixes to examples and tests to use correct variable labels and names (thanks Derrick Atherton for pointing this out).
+* Fix version number in NEWS
 
 v 0.2.0
 =======

diff --git a/R/prep_functions.R b/R/prep_functions.R
@@ -18,11 +18,11 @@
 #'
 #' @examples
 #' cons <- data.frame(
-#' "zone"  = letters[1:3],
-#' "a0_49" = c(8, 2, 7),
-#' "a_gt50" = c(4, 8, 4),
-#' "f"    = c(6, 6, 8),
-#' "m"    = c(6, 4, 3)
+#' "zone"      = letters[1:3],
+#' "age_0_49"  = c(8, 2, 7),
+#' "age_gt_50" = c(4, 8, 4),
+#' "sex_f"     = c(6, 6, 8),
+#' "sex_m"     = c(6, 4, 3)
 #' )
 #' check_constraint(cons, 3)  # no errors
 check_constraint <- function(constraint_var, num_zones) {

diff --git a/R/rake_functions.R b/R/rake_functions.R
@@ -20,8 +20,8 @@
 #'
 #' It is essential that the levels in each \code{inds} constraint (i.e. column)
 #' match exactly with the column names in \code{cons}. In the example below see
-#' how the column names in cons (\code{'a0_49', 'f', ...}) match exactly the
-#' levels in \code{inds} variables.
+#' how the column names in cons (\code{'age_0_49', 'sex_f', ...}) match exactly
+#' the levels in \code{inds} variables.
 #'
 #' The columns in \code{cons} must be in alphabetical order because these are
 #' created alphabetically when they are 'spread' in the individual--level data.
@@ -46,17 +46,17 @@
 #' @examples
 #' # SimpleWorld
 #' cons <- data.frame(
-#' "zone"   = letters[1:3],
-#' "a0_49"  = c(8, 2, 7),
-#' "a_gt50" = c(4, 8, 4),
-#' "f"      = c(6, 6, 8),
-#' "m"      = c(6, 4, 3),
+#' "zone"      = letters[1:3],
+#' "age_0_49"  = c(8, 2, 7),
+#' "age_gt_50" = c(4, 8, 4),
+#' "sex_f"     = c(6, 6, 8),
+#' "sex_m"     = c(6, 4, 3),
 #' stringsAsFactors = FALSE
 #' )
 #' inds <- data.frame(
 #' "id"     = LETTERS[1:5],
-#' "age"    = c("a_gt50", "a_gt50", "a0_49", "a_gt50", "a0_49"),
-#' "sex"    = c("m", "m", "m", "f", "f"),
+#' "age"    = c("age_gt_50", "age_gt_50", "age_0_49", "age_gt_50", "age_0_49"),
+#' "sex"    = c("sex_m", "sex_m", "sex_m", "sex_f", "sex_f"),
 #' "income" = c(2868, 2474, 2231, 3152, 2473),
 #' stringsAsFactors = FALSE
 #' )
@@ -392,26 +392,25 @@ extract_weights <- function(weights, inds, id) {
 #'
 #' @examples
 #' cons <- data.frame(
-#'   "zone"   = letters[1:3],
-#'   "a0_49"  = c(8, 2, 7),
-#'   "a_gt50" = c(4, 8, 4),
-#'   "f"      = c(6, 6, 8),
-#'   "m"      = c(6, 4, 3),
+#'   "zone"      = letters[1:3],
+#'   "age_0_49"  = c(8, 2, 7),
+#'   "age_gt_50" = c(4, 8, 4),
+#'   "sex_f"     = c(6, 6, 8),
+#'   "sex_m"     = c(6, 4, 3),
 #'   stringsAsFactors = FALSE
 #' )
 #'
 #' inds <- data.frame(
 #'   "id"     = LETTERS[1:5],
-#'   "age"    = c("a_gt50", "a_gt50", "a0_49", "a_gt50", "a0_49"),
-#'   "sex"    = c("m", "m", "m", "f", "f"),
+#'   "age"    = c("age_gt_50", "age_gt_50", "age_0_49", "age_gt_50", "age_0_49"),
+#'   "sex"    = c("sex_m", "sex_m", "sex_m", "sex_f", "sex_f"),
 #'   "income" = c(2868, 2474, 2231, 3152, 2473),
 #'   stringsAsFactors = FALSE
 #' )
 #' vars <- c("age", "sex")
 #'
 #' weights     <- weight(cons = cons, inds = inds, vars = vars)
 #' weights_int <- integerise(weights, inds = inds)
-#' print(weights_int)
 integerise <- function(weights, inds, method = "trs", seed = 42) {
 
   # Ensures the output of the function is reproducible (uses sample())

diff --git a/README.Rmd b/README.Rmd
@@ -72,17 +72,17 @@ below) are only used as output, not as part of the constraint process.
 ```{r data}
 cons <- data.frame(
   "zone"   = letters[1:3],
-  "a0_49"  = c(8, 2, 7),
-  "a_gt50" = c(4, 8, 4),
-  "f"      = c(6, 6, 8),
-  "m"      = c(6, 4, 3),
+  "age_0_49"  = c(8, 2, 7),
+  "age_gt_50" = c(4, 8, 4),
+  "sex_f"      = c(6, 6, 8),
+  "sex_m"      = c(6, 4, 3),
   stringsAsFactors = FALSE
 )
 
 inds <- data.frame(
   "id"     = LETTERS[1:5],
-  "age"    = c("a_gt50", "a_gt50", "a0_49", "a_gt50", "a0_49"),
-  "sex"    = c("m", "m", "m", "f", "f"),
+  "age"    = c("age_gt_50", "age_gt_50", "age_0_49", "age_gt_50", "age_0_49"),
+  "sex"    = c("sex_m", "sex_m", "sex_m", "sex_f", "sex_f"),
   "income" = c(2868, 2474, 2231, 3152, 2473)
 )
 
@@ -91,7 +91,7 @@ vars <- c("age", "sex")
 
 It is _essential_ that the unique levels in the constraint variables in the 
 `inds` data set match the variables names in the `cons` data set.
-For example, `a0_49` and `a_gt50` are variable names in `cons`.
+For example, `age_0_49` and `age_gt_50` are variable names in `cons`.
 The unique levels of the `age` variable in `inds` precisely match these:
 
 ```{r labels-test}

diff --git a/README.md b/README.md
@@ -51,24 +51,24 @@ In addition, supply a character vector with the names of the constraint variable
 ``` r
 cons <- data.frame(
   "zone"   = letters[1:3],
-  "a0_49"  = c(8, 2, 7),
-  "a_gt50" = c(4, 8, 4),
-  "f"      = c(6, 6, 8),
-  "m"      = c(6, 4, 3),
+  "age_0_49"  = c(8, 2, 7),
+  "age_gt_50" = c(4, 8, 4),
+  "sex_f"      = c(6, 6, 8),
+  "sex_m"      = c(6, 4, 3),
   stringsAsFactors = FALSE
 )
 
 inds <- data.frame(
   "id"     = LETTERS[1:5],
-  "age"    = c("a_gt50", "a_gt50", "a0_49", "a_gt50", "a0_49"),
-  "sex"    = c("m", "m", "m", "f", "f"),
+  "age"    = c("age_gt_50", "age_gt_50", "age_0_49", "age_gt_50", "age_0_49"),
+  "sex"    = c("sex_m", "sex_m", "sex_m", "sex_f", "sex_f"),
   "income" = c(2868, 2474, 2231, 3152, 2473)
 )
 
 vars <- c("age", "sex")
 ```
 
-It is *essential* that the unique levels in the constraint variables in the `inds` data set match the variables names in the `cons` data set. For example, `a0_49` and `a_gt50` are variable names in `cons`. The unique levels of the `age` variable in `inds` precisely match these:
+It is *essential* that the unique levels in the constraint variables in the `inds` data set match the variables names in the `cons` data set. For example, `age_0_49` and `age_gt_50` are variable names in `cons`. The unique levels of the `age` variable in `inds` precisely match these:
 
 ``` r
 all.equal(
@@ -110,13 +110,13 @@ The raw weights tell you how frequently each individual (`A`-`E`) should appear
 ``` r
 int_weights <- integerise(weights, inds = inds)
 int_weights[1:6, ]
-#>     id    age sex income zone
-#> 1    A a_gt50   m   2868    a
-#> 1.1  A a_gt50   m   2868    a
-#> 2    B a_gt50   m   2474    a
-#> 3    C  a0_49   m   2231    a
-#> 3.1  C  a0_49   m   2231    a
-#> 3.2  C  a0_49   m   2231    a
+#>     id       age   sex income zone
+#> 1    A age_gt_50 sex_m   2868    a
+#> 1.1  A age_gt_50 sex_m   2868    a
+#> 2    B age_gt_50 sex_m   2474    a
+#> 3    C  age_0_49 sex_m   2231    a
+#> 3.1  C  age_0_49 sex_m   2231    a
+#> 3.2  C  age_0_49 sex_m   2231    a
 ```
 
 `integerise()` returns one row per case, and the number of rows will match the known population (taken from `cons`).
@@ -134,10 +134,10 @@ inds$income <- cut(inds$income, breaks = 2, include.lowest = TRUE,
 
 ext_weights <- extract(weights, inds = inds, id = "id")
 ext_weights
-#>   code total a0_49 a_gt50 f m     high      low
-#> 1    a    12     8      4 6 6 2.772002 9.227998
-#> 2    b    10     2      8 6 4 6.274917 3.725083
-#> 3    c    11     7      4 8 3 3.274917 7.725083
+#>   code total age_0_49 age_gt_50 sex_f sex_m     high      low
+#> 1    a    12        8         4     6     6 2.772002 9.227998
+#> 2    b    10        2         8     6     4 6.274917 3.725083
+#> 3    c    11        7         4     8     3 3.274917 7.725083
 ```
 
 `extract()` returns one row per zone, and the total of each category (for example female and male, or high and low income) will match the known population.
@@ -168,22 +168,22 @@ Details of these context-specific arguments can be found in the respective docum
 rake_int <- rake(cons, inds, vars, output = "integer",
                  method = "trs", seed = 42)
 rake_int[1:6, ]
-#>     id    age sex income zone
-#> 1    A a_gt50   m   high    a
-#> 1.1  A a_gt50   m   high    a
-#> 2    B a_gt50   m    low    a
-#> 3    C  a0_49   m    low    a
-#> 3.1  C  a0_49   m    low    a
-#> 3.2  C  a0_49   m    low    a
+#>     id       age   sex income zone
+#> 1    A age_gt_50 sex_m   high    a
+#> 1.1  A age_gt_50 sex_m   high    a
+#> 2    B age_gt_50 sex_m    low    a
+#> 3    C  age_0_49 sex_m    low    a
+#> 3.1  C  age_0_49 sex_m    low    a
+#> 3.2  C  age_0_49 sex_m    low    a
 ```
 
 ``` r
 rake_frac <- rake(cons, inds, vars, output = "fraction", id = "id")
 rake_frac
-#>   code total a0_49 a_gt50 f m     high      low
-#> 1    a    12     8      4 6 6 2.772002 9.227998
-#> 2    b    10     2      8 6 4 6.274917 3.725083
-#> 3    c    11     7      4 8 3 3.274917 7.725083
+#>   code total age_0_49 age_gt_50 sex_f sex_m     high      low
+#> 1    a    12        8         4     6     6 2.772002 9.227998
+#> 2    b    10        2         8     6     4 6.274917 3.725083
+#> 3    c    11        7         4     8     3 3.274917 7.725083
 ```
 
 Contributions

diff --git a/cran-comments.md b/cran-comments.md
@@ -1,13 +1,14 @@
-## R CMD check results
-* There were no ERRORs or WARNINGs
-* One NOTE refers to the spelling of 'microsimulation' (which is correct).
+## Test environments
+* Local Ubuntu Linux (16.06 Xenial) 64-bit, R 3.4.2
+* Ubuntu Linux (14.04.5 Trusty) on Travis-CI, R 3.4.1
+* win-builder: R-release v3.4.2 (2017-09-28)
+* win-builder: R-devel (unstable) (2017-09-12 r73242)
 
 
-## Test environments
-* Local Ubuntu Linux (16.06 Xenial) 64-bit, R 3.4.0
-* Ubuntu Linux (12.04.5 Precise) on Travis-CI, R 3.4.0
-* win-builder: R-release v3.4.1 (2017-06-30)
-* win-builder: R-devel (unstable) (2017-06-29 r72864)
+## R CMD check results
+* Local:     no ERRORs, WARNINGs, or NOTEs
+* R-release: 1 NOTE - spelling of 'microsimulation' (which is correct)
+* R-devel:   1 NOTE - development version
 
 
 ## Downstream dependencies

diff --git a/data-raw/prep-cakemap.R b/data-raw/prep-cakemap.R
@@ -31,9 +31,9 @@ inds$Car <- factor(inds$Car, levels = 1:2,
                    labels = c("car_yes", "car_no"))
 
 inds$ageband4[inds$Sex == "male"] <-
-  paste0("m", inds$ageband4[inds$Sex == "male"])
+  paste0("sex_m", inds$ageband4[inds$Sex == "male"])
 inds$ageband4[inds$Sex == "female"] <-
-  paste0("f", inds$ageband4[inds$Sex == "female"])
+  paste0("sex_f", inds$ageband4[inds$Sex == "female"])
 inds <- inds[, -3]
 
 readr::write_csv(inds, path = "tests/cakemap_inds.csv")