Merge pull request #317 from ayushpatnaikgit/0.3.1

Fix leftover bugs
xKDR · Dec 22, 2023 · 9da1fd1 · 9da1fd1
2 parents 253ead9 + 300192f
commit 9da1fd1
Show file tree

Hide file tree

Showing 9 changed files with 83 additions and 85 deletions.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -2,6 +2,7 @@
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 Survey = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -17,7 +17,7 @@ JackknifeReplicates
 load_data
 bootweights
 jackknifeweights
-stderr
+Survey.standarderror
 mean
 total
 quantile
@@ -26,4 +26,6 @@ glm
 plot
 boxplot
 hist
+Survey.sturges
+Survey.freedman_diaconis
 ```
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
@@ -61,7 +61,7 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
 end
 
 """
-stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
+standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
 
 Compute the standard error of the estimated mean using the bootstrap method.
 
@@ -85,19 +85,18 @@ where above ``R`` is the number of replicate weights, ``\\theta_i`` is the estim
 
 # Examples
 
-```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
+```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
+julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
 
-julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
-
-julia> stderr(:api00, mean, bclus1)
+julia> Survey.standarderror(:api00, my_mean, bclus1)
 1×2 DataFrame
  Row │ estimator  SE
      │ Float64    Float64
 ─────┼────────────────────
    1 │   644.169  23.4107
 ```
 """
-function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
+function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
 
     # Compute the estimators
     θs = func(design.data, x, design.weights, args...; kwargs...)

diff --git a/src/jackknife.jl b/src/jackknife.jl
@@ -83,7 +83,7 @@ function jackknifeweights(design::SurveyDesign)
 end
 
 """
-stderr(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates})
+standarderror(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates})
 
 Compute standard error of column `x` for the given `func` using the Jackknife method. The formula to compute this variance is the following.
 
@@ -94,11 +94,10 @@ Compute standard error of column `x` for the given `func` using the Jackknife me
 Above, ``\\hat{\\theta}`` represents the estimator computed using the original weights, and ``\\hat{\\theta_{(hj)}}`` represents the estimator computed from the replicate weights obtained when PSU ``j`` from cluster ``h`` is removed.
 
 # Examples
-```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat);)
+```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat))
+julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
 
-julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
-
-julia> stderr(:api00, mean, rstrat)
+julia> Survey.standarderror(:api00, my_mean, rstrat)
 1×2 DataFrame
  Row │ estimator  SE
      │ Float64    Float64
@@ -108,7 +107,7 @@ julia> stderr(:api00, mean, rstrat)
 # Reference
 pg 380-382, Section 9.3.2 Jackknife - Sharon Lohr, Sampling Design and Analysis (2010)
 """
-function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...)
+function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...)
 
     df = design.data
     stratified_gdf = groupby(df, design.strata)

diff --git a/src/mean.jl b/src/mean.jl
@@ -44,8 +44,7 @@ Compute the standard error of the estimated mean using replicate weights.
 
 # Examples
 
-```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
-
+```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
 julia> mean(:api00, bclus1)
 1×2 DataFrame
  Row │ mean     SE
@@ -62,7 +61,7 @@ function mean(x::Symbol, design::ReplicateDesign)
     end
 
     # Calculate the mean and standard error
-    df = Survey.stderr(x, inner_mean, design)
+    df = Survey.standarderror(x, inner_mean, design)
 
     rename!(df, :estimator => :mean)
 
@@ -108,40 +107,40 @@ Estimate means of domains.
 ```jldoctest meanlabel; setup = :(apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
 julia> mean(:api00, :cname, dclus1)
 11×2 DataFrame
- Row │ cname        mean    
-     │ String15     Float64 
+ Row │ mean     cname
+     │ Float64  String
 ─────┼──────────────────────
-   1 │ Alameda      669.0
-   2 │ Fresno       472.0
-   3 │ Kern         452.5
-   4 │ Los Angeles  647.267
-   5 │ Mendocino    623.25
-   6 │ Merced       519.25
-   7 │ Orange       710.563
-   8 │ Plumas       709.556
-   9 │ San Diego    659.436
-  10 │ San Joaquin  551.189
-  11 │ Santa Clara  732.077
+   1 │ 669.0    Alameda
+   2 │ 472.0    Fresno
+   3 │ 452.5    Kern
+   4 │ 647.267  Los Angeles
+   5 │ 623.25   Mendocino
+   6 │ 519.25   Merced
+   7 │ 710.563  Orange
+   8 │ 709.556  Plumas
+   9 │ 659.436  San Diego
+  10 │ 551.189  San Joaquin
+  11 │ 732.077  Santa Clara
 ```
 Use the replicate design to compute standard errors of the estimated means. 
 
 ```jldoctest meanlabel
 julia> mean(:api00, :cname, bclus1)
 11×3 DataFrame
- Row │ cname        mean     SE           
-     │ String15     Float64  Float64      
+ Row │ mean     SE            cname
+     │ Float64  Float64       String
 ─────┼────────────────────────────────────
-   1 │ Santa Clara  732.077  58.2169
-   2 │ San Diego    659.436   2.66703
-   3 │ Merced       519.25    2.28936e-15
-   4 │ Los Angeles  647.267  47.6233
-   5 │ Orange       710.563   2.19826e-13
-   6 │ Fresno       472.0     1.13687e-13
-   7 │ Plumas       709.556   1.26058e-13
-   8 │ Alameda      669.0     1.27527e-13
-   9 │ San Joaquin  551.189   2.1791e-13
-  10 │ Kern         452.5     0.0
-  11 │ Mendocino    623.25    1.09545e-13
+   1 │ 732.077  58.2169       Santa Clara
+   2 │ 659.436   2.66703      San Diego
+   3 │ 519.25    2.28936e-15  Merced
+   4 │ 647.267  47.6233       Los Angeles
+   5 │ 710.563   2.19826e-13  Orange
+   6 │ 472.0     1.13687e-13  Fresno
+   7 │ 709.556   1.26058e-13  Plumas
+   8 │ 669.0     1.27527e-13  Alameda
+   9 │ 551.189   2.18162e-13  San Joaquin
+  10 │ 452.5     0.0          Kern
+  11 │ 623.25    1.09545e-13  Mendocino
 ```
 """
 function mean(x::Symbol, domain, design::AbstractSurveyDesign)

diff --git a/src/quantile.jl b/src/quantile.jl
@@ -48,8 +48,7 @@ Compute the standard error of the estimated quantile using replicate weights.
 
 # Examples
 
-```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights;)
-
+```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights)
 julia> quantile(:api00, bsrs, 0.5)
 1×2 DataFrame
  Row │ 0.5th percentile  SE
@@ -66,7 +65,7 @@ function quantile(x::Symbol, design::ReplicateDesign, p::Real; kwargs...)
     end
 
     # Calculate the quantile and standard error
-    df = stderr(x, inner_quantile, design)
+    df = standarderror(x, inner_quantile, design)
 
     rename!(df, :estimator => string(p) * "th percentile")
 
@@ -138,11 +137,11 @@ quantile(var, domain, design)
 
 Estimate a quantile of domains.
 
-```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
+```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
 julia> quantile(:api00, :cname, dclus1, 0.5)
 11×2 DataFrame
- Row │ 0.5th percentile  cname       
-     │ Float64           String15    
+ Row │ 0.5th percentile  cname
+     │ Float64           String
 ─────┼───────────────────────────────
    1 │            669.0  Alameda
    2 │            474.5  Fresno

diff --git a/src/ratio.jl b/src/ratio.jl
@@ -8,7 +8,7 @@ julia> apiclus1 = load_data("apiclus1");
 
 julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
 
-julia> ratio(:api00, :enroll, dclus1)
+julia> ratio([:api00, :enroll], dclus1)
 1×1 DataFrame
  Row │ ratio
      │ Float64
@@ -40,7 +40,6 @@ Compute the standard error of the ratio using replicate weights.
 # Examples
 
 ```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = bootweights(dclus1);)
-
 julia> ratio([:api00, :api99], bclus1)
 1×2 DataFrame
  Row │ estimator  SE         
@@ -58,20 +57,20 @@ function ratio(x::Vector{Symbol}, design::ReplicateDesign)
         return sum(df[!, columns[1]], StatsBase.weights(df[!, weights_column])) / sum(df[!, columns[2]], StatsBase.weights(df[!, weights_column]))
     end
 
-    # Calculate the standard error using the `stderr` function with the inner function
-    return stderr([variable_num, variable_den], inner_ratio, design)
+    # Calculate the standard error using the `standarderror` function with the inner function
+    return standarderror([variable_num, variable_den], inner_ratio, design)
 end
 
 """
     ratio(var, domain, design)
 
 Estimate ratios of domains.
 
-```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
+```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
 julia> ratio([:api00, :api99], :cname, dclus1)
 11×2 DataFrame
- Row │ ratio    cname       
-     │ Float64  String15    
+ Row │ ratio    cname
+     │ Float64  String
 ─────┼──────────────────────
    1 │ 1.09852  Alameda
    2 │ 1.17779  Fresno

diff --git a/src/reg.jl b/src/reg.jl
@@ -43,5 +43,5 @@ function glm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...)
     end
 
     # Compute standard error of coefficients
-    stderr(columns, inner_glm, design, args...; kwargs...)
+    standarderror(columns, inner_glm, design, args...; kwargs...)
   end
diff --git a/src/total.jl b/src/total.jl
@@ -35,7 +35,7 @@ Compute the standard error of the estimated total using replicate weights.
 
 # Examples
 
-```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
+```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
 julia> total(:api00, bclus1)
 1×2 DataFrame
  Row │ total      SE
@@ -52,7 +52,7 @@ function total(x::Symbol, design::ReplicateDesign)
     end
 
     # Calculate the total and standard error
-    df = stderr(x, inner_total, design)
+    df = standarderror(x, inner_total, design)
 
     rename!(df, :estimator => :total)
 
@@ -95,43 +95,43 @@ end
 
 Estimate population totals of domains.
 
-```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
+```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
 julia> total(:api00, :cname, dclus1)
 11×2 DataFrame
- Row │ cname        total          
-     │ String15     Float64        
+ Row │ total           cname
+     │ Float64         String
 ─────┼─────────────────────────────
-   1 │ Alameda      249080.0
-   2 │ Fresno        63903.1
-   3 │ Kern          30631.5
-   4 │ Los Angeles       3.2862e5
-   5 │ Mendocino     84380.6
-   6 │ Merced        70300.2
-   7 │ Orange            3.84807e5
-   8 │ Plumas            2.16147e5
-   9 │ San Diego         1.2276e6
-  10 │ San Joaquin       6.90276e5
-  11 │ Santa Clara       6.44244e5
+   1 │ 249080.0        Alameda
+   2 │  63903.1        Fresno
+   3 │  30631.5        Kern
+   4 │      3.2862e5   Los Angeles
+   5 │  84380.6        Mendocino
+   6 │  70300.2        Merced
+   7 │      3.84807e5  Orange
+   8 │      2.16147e5  Plumas
+   9 │      1.2276e6   San Diego
+  10 │      6.90276e5  San Joaquin
+  11 │      6.44244e5  Santa Clara
 ```
 Use the replicate design to compute standard errors of the estimated totals. 
 
 ```jldoctest totallabel
 julia> total(:api00, :cname, bclus1)
 11×3 DataFrame
- Row │ cname        total           SE            
-     │ String15     Float64         Float64       
+ Row │ total           SE             cname
+     │ Float64         Float64        String
 ─────┼────────────────────────────────────────────
-   1 │ Santa Clara       6.44244e5      4.2273e5
-   2 │ San Diego         1.2276e6       8.62727e5
-   3 │ Merced        70300.2        71336.3
-   4 │ Los Angeles       3.2862e5       2.93936e5
-   5 │ Orange            3.84807e5      3.88014e5
-   6 │ Fresno        63903.1        64781.7
-   7 │ Plumas            2.16147e5      2.12089e5
-   8 │ Alameda      249080.0            2.49228e5
-   9 │ San Joaquin       6.90276e5      6.81604e5
-  10 │ Kern          30631.5        30870.3
-  11 │ Mendocino     84380.6        80215.9
+   1 │      6.44244e5      4.2273e5   Santa Clara
+   2 │      1.2276e6       8.62727e5  San Diego
+   3 │  70300.2        71336.3        Merced
+   4 │      3.2862e5       2.93936e5  Los Angeles
+   5 │      3.84807e5      3.88014e5  Orange
+   6 │  63903.1        64781.7        Fresno
+   7 │      2.16147e5      2.12089e5  Plumas
+   8 │ 249080.0            2.49228e5  Alameda
+   9 │      6.90276e5      6.81604e5  San Joaquin
+  10 │  30631.5        30870.3        Kern
+  11 │  84380.6        80215.9        Mendocino
 ```
 """
 function total(x::Symbol, domain, design::AbstractSurveyDesign)