From 65446be3ca4d8fa84164c9231a520c617d662479 Mon Sep 17 00:00:00 2001 From: ayushpatnaikgit Date: Thu, 21 Dec 2023 22:29:12 +0530 Subject: [PATCH 1/5] Remove extra semicolon --- src/bootstrap.jl | 3 +-- src/mean.jl | 3 +-- src/quantile.jl | 5 ++--- src/ratio.jl | 3 +-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 956c79b..0f249f9 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -85,8 +85,7 @@ where above ``R`` is the number of replicate weights, ``\\theta_i`` is the estim # Examples -```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) - +```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); julia> stderr(:api00, mean, bclus1) diff --git a/src/mean.jl b/src/mean.jl index 2d56437..c41f45f 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -44,8 +44,7 @@ Compute the standard error of the estimated mean using replicate weights. # Examples -```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) - +```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> mean(:api00, bclus1) 1×2 DataFrame Row │ mean SE diff --git a/src/quantile.jl b/src/quantile.jl index 0d34a03..e4b5df0 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -48,8 +48,7 @@ Compute the standard error of the estimated quantile using replicate weights. # Examples -```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights;) - +```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights) julia> quantile(:api00, bsrs, 0.5) 1×2 DataFrame Row │ 0.5th percentile SE @@ -138,7 +137,7 @@ quantile(var, domain, design) Estimate a quantile of domains. -```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> quantile(:api00, :cname, dclus1, 0.5) 11×2 DataFrame Row │ 0.5th percentile cname diff --git a/src/ratio.jl b/src/ratio.jl index 417696f..054309a 100644 --- a/src/ratio.jl +++ b/src/ratio.jl @@ -40,7 +40,6 @@ Compute the standard error of the ratio using replicate weights. # Examples ```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = bootweights(dclus1);) - julia> ratio([:api00, :api99], bclus1) 1×2 DataFrame Row │ estimator SE @@ -67,7 +66,7 @@ end Estimate ratios of domains. -```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> ratio([:api00, :api99], :cname, dclus1) 11×2 DataFrame Row │ ratio cname From ccbe444e3be17b013ca57f0e0310bd069e9e7f8a Mon Sep 17 00:00:00 2001 From: ayushpatnaikgit Date: Fri, 22 Dec 2023 09:50:52 +0530 Subject: [PATCH 2/5] Can't use the name stderror --- docs/src/api.md | 2 +- src/bootstrap.jl | 8 ++++---- src/jackknife.jl | 11 +++++------ src/mean.jl | 2 +- src/quantile.jl | 2 +- src/ratio.jl | 6 +++--- src/reg.jl | 2 +- src/total.jl | 6 +++--- 8 files changed, 19 insertions(+), 20 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 729f90f..1e0cbd5 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -17,7 +17,7 @@ JackknifeReplicates load_data bootweights jackknifeweights -stderr +standarderror mean total quantile diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 0f249f9..81b82bf 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -61,7 +61,7 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis end """ -stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) +standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) Compute the standard error of the estimated mean using the bootstrap method. @@ -86,9 +86,9 @@ where above ``R`` is the number of replicate weights, ``\\theta_i`` is the estim # Examples ```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) -julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); +julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); -julia> stderr(:api00, mean, bclus1) +julia> Survey.standarderror(:api00, my_mean, bclus1) 1×2 DataFrame Row │ estimator SE │ Float64 Float64 @@ -96,7 +96,7 @@ julia> stderr(:api00, mean, bclus1) 1 │ 644.169 23.4107 ``` """ -function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) +function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...) # Compute the estimators θs = func(design.data, x, design.weights, args...; kwargs...) diff --git a/src/jackknife.jl b/src/jackknife.jl index ccc89f1..b044e0f 100644 --- a/src/jackknife.jl +++ b/src/jackknife.jl @@ -83,7 +83,7 @@ function jackknifeweights(design::SurveyDesign) end """ -stderr(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates}) +standarderror(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates}) Compute standard error of column `x` for the given `func` using the Jackknife method. The formula to compute this variance is the following. @@ -94,11 +94,10 @@ Compute standard error of column `x` for the given `func` using the Jackknife me Above, ``\\hat{\\theta}`` represents the estimator computed using the original weights, and ``\\hat{\\theta_{(hj)}}`` represents the estimator computed from the replicate weights obtained when PSU ``j`` from cluster ``h`` is removed. # Examples -```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat);) +```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat)) +julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); -julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights])); - -julia> stderr(:api00, mean, rstrat) +julia> Survey.standarderror(:api00, my_mean, rstrat) 1×2 DataFrame Row │ estimator SE │ Float64 Float64 @@ -108,7 +107,7 @@ julia> stderr(:api00, mean, rstrat) # Reference pg 380-382, Section 9.3.2 Jackknife - Sharon Lohr, Sampling Design and Analysis (2010) """ -function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...) +function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...) df = design.data stratified_gdf = groupby(df, design.strata) diff --git a/src/mean.jl b/src/mean.jl index c41f45f..e5a5615 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -61,7 +61,7 @@ function mean(x::Symbol, design::ReplicateDesign) end # Calculate the mean and standard error - df = Survey.stderr(x, inner_mean, design) + df = Survey.standarderror(x, inner_mean, design) rename!(df, :estimator => :mean) diff --git a/src/quantile.jl b/src/quantile.jl index e4b5df0..b025a54 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -65,7 +65,7 @@ function quantile(x::Symbol, design::ReplicateDesign, p::Real; kwargs...) end # Calculate the quantile and standard error - df = stderr(x, inner_quantile, design) + df = standarderror(x, inner_quantile, design) rename!(df, :estimator => string(p) * "th percentile") diff --git a/src/ratio.jl b/src/ratio.jl index 054309a..2e9c518 100644 --- a/src/ratio.jl +++ b/src/ratio.jl @@ -8,7 +8,7 @@ julia> apiclus1 = load_data("apiclus1"); julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); -julia> ratio(:api00, :enroll, dclus1) +julia> ratio([:api00, :enroll], dclus1) 1×1 DataFrame Row │ ratio │ Float64 @@ -57,8 +57,8 @@ function ratio(x::Vector{Symbol}, design::ReplicateDesign) return sum(df[!, columns[1]], StatsBase.weights(df[!, weights_column])) / sum(df[!, columns[2]], StatsBase.weights(df[!, weights_column])) end - # Calculate the standard error using the `stderr` function with the inner function - return stderr([variable_num, variable_den], inner_ratio, design) + # Calculate the standard error using the `standarderror` function with the inner function + return standarderror([variable_num, variable_den], inner_ratio, design) end """ diff --git a/src/reg.jl b/src/reg.jl index 7ba177f..de8233e 100644 --- a/src/reg.jl +++ b/src/reg.jl @@ -43,5 +43,5 @@ function glm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...) end # Compute standard error of coefficients - stderr(columns, inner_glm, design, args...; kwargs...) + standarderror(columns, inner_glm, design, args...; kwargs...) end \ No newline at end of file diff --git a/src/total.jl b/src/total.jl index ced0e44..17712fd 100644 --- a/src/total.jl +++ b/src/total.jl @@ -35,7 +35,7 @@ Compute the standard error of the estimated total using replicate weights. # Examples -```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> total(:api00, bclus1) 1×2 DataFrame Row │ total SE @@ -52,7 +52,7 @@ function total(x::Symbol, design::ReplicateDesign) end # Calculate the total and standard error - df = stderr(x, inner_total, design) + df = standarderror(x, inner_total, design) rename!(df, :estimator => :total) @@ -95,7 +95,7 @@ end Estimate population totals of domains. -```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;) +```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> total(:api00, :cname, dclus1) 11×2 DataFrame Row │ cname total From 012d01213d33a8b613f2b865a6cd833272d2fb8a Mon Sep 17 00:00:00 2001 From: ayushpatnaikgit Date: Fri, 22 Dec 2023 14:40:43 +0530 Subject: [PATCH 3/5] standarderror isn't exported --- docs/src/api.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/src/api.md b/docs/src/api.md index 1e0cbd5..01c7451 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -17,7 +17,7 @@ JackknifeReplicates load_data bootweights jackknifeweights -standarderror +Survey.standarderror mean total quantile @@ -26,4 +26,6 @@ glm plot boxplot hist +Survey.sturges +Survey.freedman_diaconis ``` From 4e2e367a0136aef805f46f2201a2426975dc0959 Mon Sep 17 00:00:00 2001 From: ayushpatnaikgit Date: Fri, 22 Dec 2023 14:41:04 +0530 Subject: [PATCH 4/5] Used fix = true to make these changes. --- src/mean.jl | 52 ++++++++++++++++++++++++------------------------- src/quantile.jl | 4 ++-- src/ratio.jl | 4 ++-- src/total.jl | 52 ++++++++++++++++++++++++------------------------- 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/mean.jl b/src/mean.jl index e5a5615..f203cd0 100644 --- a/src/mean.jl +++ b/src/mean.jl @@ -107,40 +107,40 @@ Estimate means of domains. ```jldoctest meanlabel; setup = :(apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> mean(:api00, :cname, dclus1) 11×2 DataFrame - Row │ cname mean - │ String15 Float64 + Row │ mean cname + │ Float64 String ─────┼────────────────────── - 1 │ Alameda 669.0 - 2 │ Fresno 472.0 - 3 │ Kern 452.5 - 4 │ Los Angeles 647.267 - 5 │ Mendocino 623.25 - 6 │ Merced 519.25 - 7 │ Orange 710.563 - 8 │ Plumas 709.556 - 9 │ San Diego 659.436 - 10 │ San Joaquin 551.189 - 11 │ Santa Clara 732.077 + 1 │ 669.0 Alameda + 2 │ 472.0 Fresno + 3 │ 452.5 Kern + 4 │ 647.267 Los Angeles + 5 │ 623.25 Mendocino + 6 │ 519.25 Merced + 7 │ 710.563 Orange + 8 │ 709.556 Plumas + 9 │ 659.436 San Diego + 10 │ 551.189 San Joaquin + 11 │ 732.077 Santa Clara ``` Use the replicate design to compute standard errors of the estimated means. ```jldoctest meanlabel julia> mean(:api00, :cname, bclus1) 11×3 DataFrame - Row │ cname mean SE - │ String15 Float64 Float64 + Row │ mean SE cname + │ Float64 Float64 String ─────┼──────────────────────────────────── - 1 │ Santa Clara 732.077 58.2169 - 2 │ San Diego 659.436 2.66703 - 3 │ Merced 519.25 2.28936e-15 - 4 │ Los Angeles 647.267 47.6233 - 5 │ Orange 710.563 2.19826e-13 - 6 │ Fresno 472.0 1.13687e-13 - 7 │ Plumas 709.556 1.26058e-13 - 8 │ Alameda 669.0 1.27527e-13 - 9 │ San Joaquin 551.189 2.1791e-13 - 10 │ Kern 452.5 0.0 - 11 │ Mendocino 623.25 1.09545e-13 + 1 │ 732.077 58.2169 Santa Clara + 2 │ 659.436 2.66703 San Diego + 3 │ 519.25 2.28936e-15 Merced + 4 │ 647.267 47.6233 Los Angeles + 5 │ 710.563 2.19826e-13 Orange + 6 │ 472.0 1.13687e-13 Fresno + 7 │ 709.556 1.26058e-13 Plumas + 8 │ 669.0 1.27527e-13 Alameda + 9 │ 551.189 2.18162e-13 San Joaquin + 10 │ 452.5 0.0 Kern + 11 │ 623.25 1.09545e-13 Mendocino ``` """ function mean(x::Symbol, domain, design::AbstractSurveyDesign) diff --git a/src/quantile.jl b/src/quantile.jl index b025a54..c6574be 100644 --- a/src/quantile.jl +++ b/src/quantile.jl @@ -140,8 +140,8 @@ Estimate a quantile of domains. ```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> quantile(:api00, :cname, dclus1, 0.5) 11×2 DataFrame - Row │ 0.5th percentile cname - │ Float64 String15 + Row │ 0.5th percentile cname + │ Float64 String ─────┼─────────────────────────────── 1 │ 669.0 Alameda 2 │ 474.5 Fresno diff --git a/src/ratio.jl b/src/ratio.jl index 2e9c518..b6cac14 100644 --- a/src/ratio.jl +++ b/src/ratio.jl @@ -69,8 +69,8 @@ Estimate ratios of domains. ```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> ratio([:api00, :api99], :cname, dclus1) 11×2 DataFrame - Row │ ratio cname - │ Float64 String15 + Row │ ratio cname + │ Float64 String ─────┼────────────────────── 1 │ 1.09852 Alameda 2 │ 1.17779 Fresno diff --git a/src/total.jl b/src/total.jl index 17712fd..a460ab5 100644 --- a/src/total.jl +++ b/src/total.jl @@ -98,40 +98,40 @@ Estimate population totals of domains. ```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights) julia> total(:api00, :cname, dclus1) 11×2 DataFrame - Row │ cname total - │ String15 Float64 + Row │ total cname + │ Float64 String ─────┼───────────────────────────── - 1 │ Alameda 249080.0 - 2 │ Fresno 63903.1 - 3 │ Kern 30631.5 - 4 │ Los Angeles 3.2862e5 - 5 │ Mendocino 84380.6 - 6 │ Merced 70300.2 - 7 │ Orange 3.84807e5 - 8 │ Plumas 2.16147e5 - 9 │ San Diego 1.2276e6 - 10 │ San Joaquin 6.90276e5 - 11 │ Santa Clara 6.44244e5 + 1 │ 249080.0 Alameda + 2 │ 63903.1 Fresno + 3 │ 30631.5 Kern + 4 │ 3.2862e5 Los Angeles + 5 │ 84380.6 Mendocino + 6 │ 70300.2 Merced + 7 │ 3.84807e5 Orange + 8 │ 2.16147e5 Plumas + 9 │ 1.2276e6 San Diego + 10 │ 6.90276e5 San Joaquin + 11 │ 6.44244e5 Santa Clara ``` Use the replicate design to compute standard errors of the estimated totals. ```jldoctest totallabel julia> total(:api00, :cname, bclus1) 11×3 DataFrame - Row │ cname total SE - │ String15 Float64 Float64 + Row │ total SE cname + │ Float64 Float64 String ─────┼──────────────────────────────────────────── - 1 │ Santa Clara 6.44244e5 4.2273e5 - 2 │ San Diego 1.2276e6 8.62727e5 - 3 │ Merced 70300.2 71336.3 - 4 │ Los Angeles 3.2862e5 2.93936e5 - 5 │ Orange 3.84807e5 3.88014e5 - 6 │ Fresno 63903.1 64781.7 - 7 │ Plumas 2.16147e5 2.12089e5 - 8 │ Alameda 249080.0 2.49228e5 - 9 │ San Joaquin 6.90276e5 6.81604e5 - 10 │ Kern 30631.5 30870.3 - 11 │ Mendocino 84380.6 80215.9 + 1 │ 6.44244e5 4.2273e5 Santa Clara + 2 │ 1.2276e6 8.62727e5 San Diego + 3 │ 70300.2 71336.3 Merced + 4 │ 3.2862e5 2.93936e5 Los Angeles + 5 │ 3.84807e5 3.88014e5 Orange + 6 │ 63903.1 64781.7 Fresno + 7 │ 2.16147e5 2.12089e5 Plumas + 8 │ 249080.0 2.49228e5 Alameda + 9 │ 6.90276e5 6.81604e5 San Joaquin + 10 │ 30631.5 30870.3 Kern + 11 │ 84380.6 80215.9 Mendocino ``` """ function total(x::Symbol, domain, design::AbstractSurveyDesign) From 300192f88ebf3875a9770854322aeb1da9ae8e49 Mon Sep 17 00:00:00 2001 From: ayushpatnaikgit Date: Fri, 22 Dec 2023 15:41:34 +0530 Subject: [PATCH 5/5] Add GLM dependancy to the documentation --- docs/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/Project.toml b/docs/Project.toml index f11e820..d40c66f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,6 +2,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Survey = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"