diff --git a/docs/Project.toml b/docs/Project.toml index f11e820..d40c66f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,6 +2,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Survey = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c" diff --git a/docs/src/api.md b/docs/src/api.md index 729f90f..01c7451 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -17,7 +17,7 @@ JackknifeReplicates load_data bootweights jackknifeweights -stderr +Survey.standarderror mean total quantile @@ -26,4 +26,6 @@ glm plot boxplot hist +Survey.sturges +Survey.freedman_diaconis ``` diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 956c79b..81b82bf 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -61,7 +61,7 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis end """ -stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) +standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) Compute the standard error of the estimated mean using the bootstrap method. @@ -85,11 +85,10 @@ where above ``R`` is the number of replicate weights, ``\\theta_i`` is the estim # Examples -```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) +julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); -julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); - -julia> stderr(:api00, mean, bclus1) +julia> Survey.standarderror(:api00, my_mean, bclus1) 1×2 DataFrame Row │ estimator SE │ Float64 Float64 @@ -97,7 +96,7 @@ julia> stderr(:api00, mean, bclus1) 1 │ 644.169 23.4107 ``` """ -function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) +function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) # Compute the estimators θs = func(design.data, x, design.weights, args...; kwargs...) diff --git a/src/jackknife.jl b/src/jackknife.jl index ccc89f1..b044e0f 100644 --- a/src/jackknife.jl +++ b/src/jackknife.jl @@ -83,7 +83,7 @@ function jackknifeweights(design::SurveyDesign) end """ -stderr(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates}) +standarderror(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates}) Compute standard error of column `x` for the given `func` using the Jackknife method. The formula to compute this variance is the following. @@ -94,11 +94,10 @@ Compute standard error of column `x` for the given `func` using the Jackknife me Above, ``\\hat{\\theta}`` represents the estimator computed using the original weights, and ``\\hat{\\theta_{(hj)}}`` represents the estimator computed from the replicate weights obtained when PSU ``j`` from cluster ``h`` is removed. # Examples -```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat);) +```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat)) +julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); -julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); - -julia> stderr(:api00, mean, rstrat) +julia> Survey.standarderror(:api00, my_mean, rstrat) 1×2 DataFrame Row │ estimator SE │ Float64 Float64 @@ -108,7 +107,7 @@ julia> stderr(:api00, mean, rstrat) # Reference pg 380-382, Section 9.3.2 Jackknife - Sharon Lohr, Sampling Design and Analysis (2010) """ -function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...) +function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...) df = design.data stratified_gdf = groupby(df, design.strata) diff --git a/src/mean.jl b/src/mean.jl index 2d56437..f203cd0 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -44,8 +44,7 @@ Compute the standard error of the estimated mean using replicate weights. # Examples -```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) - +```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> mean(:api00, bclus1) 1×2 DataFrame Row │ mean SE @@ -62,7 +61,7 @@ function mean(x::Symbol, design::ReplicateDesign) end # Calculate the mean and standard error - df = Survey.stderr(x, inner_mean, design) + df = Survey.standarderror(x, inner_mean, design) rename!(df, :estimator => :mean) @@ -108,40 +107,40 @@ Estimate means of domains. ```jldoctest meanlabel; setup = :(apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> mean(:api00, :cname, dclus1) 11×2 DataFrame - Row │ cname mean - │ String15 Float64 + Row │ mean cname + │ Float64 String ─────┼────────────────────── - 1 │ Alameda 669.0 - 2 │ Fresno 472.0 - 3 │ Kern 452.5 - 4 │ Los Angeles 647.267 - 5 │ Mendocino 623.25 - 6 │ Merced 519.25 - 7 │ Orange 710.563 - 8 │ Plumas 709.556 - 9 │ San Diego 659.436 - 10 │ San Joaquin 551.189 - 11 │ Santa Clara 732.077 + 1 │ 669.0 Alameda + 2 │ 472.0 Fresno + 3 │ 452.5 Kern + 4 │ 647.267 Los Angeles + 5 │ 623.25 Mendocino + 6 │ 519.25 Merced + 7 │ 710.563 Orange + 8 │ 709.556 Plumas + 9 │ 659.436 San Diego + 10 │ 551.189 San Joaquin + 11 │ 732.077 Santa Clara ``` Use the replicate design to compute standard errors of the estimated means. ```jldoctest meanlabel julia> mean(:api00, :cname, bclus1) 11×3 DataFrame - Row │ cname mean SE - │ String15 Float64 Float64 + Row │ mean SE cname + │ Float64 Float64 String ─────┼──────────────────────────────────── - 1 │ Santa Clara 732.077 58.2169 - 2 │ San Diego 659.436 2.66703 - 3 │ Merced 519.25 2.28936e-15 - 4 │ Los Angeles 647.267 47.6233 - 5 │ Orange 710.563 2.19826e-13 - 6 │ Fresno 472.0 1.13687e-13 - 7 │ Plumas 709.556 1.26058e-13 - 8 │ Alameda 669.0 1.27527e-13 - 9 │ San Joaquin 551.189 2.1791e-13 - 10 │ Kern 452.5 0.0 - 11 │ Mendocino 623.25 1.09545e-13 + 1 │ 732.077 58.2169 Santa Clara + 2 │ 659.436 2.66703 San Diego + 3 │ 519.25 2.28936e-15 Merced + 4 │ 647.267 47.6233 Los Angeles + 5 │ 710.563 2.19826e-13 Orange + 6 │ 472.0 1.13687e-13 Fresno + 7 │ 709.556 1.26058e-13 Plumas + 8 │ 669.0 1.27527e-13 Alameda + 9 │ 551.189 2.18162e-13 San Joaquin + 10 │ 452.5 0.0 Kern + 11 │ 623.25 1.09545e-13 Mendocino ``` """ function mean(x::Symbol, domain, design::AbstractSurveyDesign) diff --git a/src/quantile.jl b/src/quantile.jl index 0d34a03..c6574be 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -48,8 +48,7 @@ Compute the standard error of the estimated quantile using replicate weights. # Examples -```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights;) - +```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights) julia> quantile(:api00, bsrs, 0.5) 1×2 DataFrame Row │ 0.5th percentile SE @@ -66,7 +65,7 @@ function quantile(x::Symbol, design::ReplicateDesign, p::Real; kwargs...) end # Calculate the quantile and standard error - df = stderr(x, inner_quantile, design) + df = standarderror(x, inner_quantile, design) rename!(df, :estimator => string(p) * "th percentile") @@ -138,11 +137,11 @@ quantile(var, domain, design) Estimate a quantile of domains. -```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> quantile(:api00, :cname, dclus1, 0.5) 11×2 DataFrame - Row │ 0.5th percentile cname - │ Float64 String15 + Row │ 0.5th percentile cname + │ Float64 String ─────┼─────────────────────────────── 1 │ 669.0 Alameda 2 │ 474.5 Fresno diff --git a/src/ratio.jl b/src/ratio.jl index 417696f..b6cac14 100644 --- a/src/ratio.jl +++ b/src/ratio.jl @@ -8,7 +8,7 @@ julia> apiclus1 = load_data("apiclus1"); julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); -julia> ratio(:api00, :enroll, dclus1) +julia> ratio([:api00, :enroll], dclus1) 1×1 DataFrame Row │ ratio │ Float64 @@ -40,7 +40,6 @@ Compute the standard error of the ratio using replicate weights. # Examples ```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = bootweights(dclus1);) - julia> ratio([:api00, :api99], bclus1) 1×2 DataFrame Row │ estimator SE @@ -58,8 +57,8 @@ function ratio(x::Vector{Symbol}, design::ReplicateDesign) return sum(df[!, columns[1]], StatsBase.weights(df[!, weights_column])) / sum(df[!, columns[2]], StatsBase.weights(df[!, weights_column])) end - # Calculate the standard error using the `stderr` function with the inner function - return stderr([variable_num, variable_den], inner_ratio, design) + # Calculate the standard error using the `standarderror` function with the inner function + return standarderror([variable_num, variable_den], inner_ratio, design) end """ @@ -67,11 +66,11 @@ end Estimate ratios of domains. -```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> ratio([:api00, :api99], :cname, dclus1) 11×2 DataFrame - Row │ ratio cname - │ Float64 String15 + Row │ ratio cname + │ Float64 String ─────┼────────────────────── 1 │ 1.09852 Alameda 2 │ 1.17779 Fresno diff --git a/src/reg.jl b/src/reg.jl index 7ba177f..de8233e 100644 --- a/src/reg.jl +++ b/src/reg.jl @@ -43,5 +43,5 @@ function glm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...) end # Compute standard error of coefficients - stderr(columns, inner_glm, design, args...; kwargs...) + standarderror(columns, inner_glm, design, args...; kwargs...) end \ No newline at end of file diff --git a/src/total.jl b/src/total.jl index ced0e44..a460ab5 100644 --- a/src/total.jl +++ b/src/total.jl @@ -35,7 +35,7 @@ Compute the standard error of the estimated total using replicate weights. # Examples -```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> total(:api00, bclus1) 1×2 DataFrame Row │ total SE @@ -52,7 +52,7 @@ function total(x::Symbol, design::ReplicateDesign) end # Calculate the total and standard error - df = stderr(x, inner_total, design) + df = standarderror(x, inner_total, design) rename!(df, :estimator => :total) @@ -95,43 +95,43 @@ end Estimate population totals of domains. -```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> total(:api00, :cname, dclus1) 11×2 DataFrame - Row │ cname total - │ String15 Float64 + Row │ total cname + │ Float64 String ─────┼───────────────────────────── - 1 │ Alameda 249080.0 - 2 │ Fresno 63903.1 - 3 │ Kern 30631.5 - 4 │ Los Angeles 3.2862e5 - 5 │ Mendocino 84380.6 - 6 │ Merced 70300.2 - 7 │ Orange 3.84807e5 - 8 │ Plumas 2.16147e5 - 9 │ San Diego 1.2276e6 - 10 │ San Joaquin 6.90276e5 - 11 │ Santa Clara 6.44244e5 + 1 │ 249080.0 Alameda + 2 │ 63903.1 Fresno + 3 │ 30631.5 Kern + 4 │ 3.2862e5 Los Angeles + 5 │ 84380.6 Mendocino + 6 │ 70300.2 Merced + 7 │ 3.84807e5 Orange + 8 │ 2.16147e5 Plumas + 9 │ 1.2276e6 San Diego + 10 │ 6.90276e5 San Joaquin + 11 │ 6.44244e5 Santa Clara ``` Use the replicate design to compute standard errors of the estimated totals. ```jldoctest totallabel julia> total(:api00, :cname, bclus1) 11×3 DataFrame - Row │ cname total SE - │ String15 Float64 Float64 + Row │ total SE cname + │ Float64 Float64 String ─────┼──────────────────────────────────────────── - 1 │ Santa Clara 6.44244e5 4.2273e5 - 2 │ San Diego 1.2276e6 8.62727e5 - 3 │ Merced 70300.2 71336.3 - 4 │ Los Angeles 3.2862e5 2.93936e5 - 5 │ Orange 3.84807e5 3.88014e5 - 6 │ Fresno 63903.1 64781.7 - 7 │ Plumas 2.16147e5 2.12089e5 - 8 │ Alameda 249080.0 2.49228e5 - 9 │ San Joaquin 6.90276e5 6.81604e5 - 10 │ Kern 30631.5 30870.3 - 11 │ Mendocino 84380.6 80215.9 + 1 │ 6.44244e5 4.2273e5 Santa Clara + 2 │ 1.2276e6 8.62727e5 San Diego + 3 │ 70300.2 71336.3 Merced + 4 │ 3.2862e5 2.93936e5 Los Angeles + 5 │ 3.84807e5 3.88014e5 Orange + 6 │ 63903.1 64781.7 Fresno + 7 │ 2.16147e5 2.12089e5 Plumas + 8 │ 249080.0 2.49228e5 Alameda + 9 │ 6.90276e5 6.81604e5 San Joaquin + 10 │ 30631.5 30870.3 Kern + 11 │ 84380.6 80215.9 Mendocino ``` """ function total(x::Symbol, domain, design::AbstractSurveyDesign)