Skip to content

Commit

Permalink
Merge pull request #317 from ayushpatnaikgit/0.3.1
Browse files Browse the repository at this point in the history
Fix leftover bugs
  • Loading branch information
ayushpatnaikgit authored Dec 22, 2023
2 parents 253ead9 + 300192f commit 9da1fd1
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 85 deletions.
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Survey = "c1a98b4d-6cd2-47ec-b9e9-69b59c35373c"
4 changes: 3 additions & 1 deletion docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ JackknifeReplicates
load_data
bootweights
jackknifeweights
stderr
Survey.standarderror
mean
total
quantile
Expand All @@ -26,4 +26,6 @@ glm
plot
boxplot
hist
Survey.sturges
Survey.freedman_diaconis
```
11 changes: 5 additions & 6 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
end

"""
stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
Compute the standard error of the estimated mean using the bootstrap method.
Expand All @@ -85,19 +85,18 @@ where above ``R`` is the number of replicate weights, ``\\theta_i`` is the estim
# Examples
```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
julia> stderr(:api00, mean, bclus1)
julia> Survey.standarderror(:api00, my_mean, bclus1)
1×2 DataFrame
Row │ estimator SE
│ Float64 Float64
─────┼────────────────────
1 │ 644.169 23.4107
```
"""
function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)

# Compute the estimators
θs = func(design.data, x, design.weights, args...; kwargs...)
Expand Down
11 changes: 5 additions & 6 deletions src/jackknife.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ function jackknifeweights(design::SurveyDesign)
end

"""
stderr(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates})
standarderror(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates})
Compute standard error of column `x` for the given `func` using the Jackknife method. The formula to compute this variance is the following.
Expand All @@ -94,11 +94,10 @@ Compute standard error of column `x` for the given `func` using the Jackknife me
Above, ``\\hat{\\theta}`` represents the estimator computed using the original weights, and ``\\hat{\\theta_{(hj)}}`` represents the estimator computed from the replicate weights obtained when PSU ``j`` from cluster ``h`` is removed.
# Examples
```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat);)
```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat))
julia> my_mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
julia> stderr(:api00, mean, rstrat)
julia> Survey.standarderror(:api00, my_mean, rstrat)
1×2 DataFrame
Row │ estimator SE
│ Float64 Float64
Expand All @@ -108,7 +107,7 @@ julia> stderr(:api00, mean, rstrat)
# Reference
pg 380-382, Section 9.3.2 Jackknife - Sharon Lohr, Sampling Design and Analysis (2010)
"""
function stderr(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...)
function standarderror(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates}, args...; kwargs...)

df = design.data
stratified_gdf = groupby(df, design.strata)
Expand Down
57 changes: 28 additions & 29 deletions src/mean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ Compute the standard error of the estimated mean using replicate weights.
# Examples
```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> mean(:api00, bclus1)
1×2 DataFrame
Row │ mean SE
Expand All @@ -62,7 +61,7 @@ function mean(x::Symbol, design::ReplicateDesign)
end

# Calculate the mean and standard error
df = Survey.stderr(x, inner_mean, design)
df = Survey.standarderror(x, inner_mean, design)

rename!(df, :estimator => :mean)

Expand Down Expand Up @@ -108,40 +107,40 @@ Estimate means of domains.
```jldoctest meanlabel; setup = :(apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> mean(:api00, :cname, dclus1)
11×2 DataFrame
Row │ cname mean
String15 Float64
Row │ mean cname
│ Float64 String
─────┼──────────────────────
1 │ Alameda 669.0
2 │ Fresno 472.0
3 │ Kern 452.5
4 │ Los Angeles 647.267
5 │ Mendocino 623.25
6 │ Merced 519.25
7 │ Orange 710.563
8 │ Plumas 709.556
9 │ San Diego 659.436
10 │ San Joaquin 551.189
11 │ Santa Clara 732.077
1 │ 669.0 Alameda
2 │ 472.0 Fresno
3 │ 452.5 Kern
4 │ 647.267 Los Angeles
5 │ 623.25 Mendocino
6 │ 519.25 Merced
7 │ 710.563 Orange
8 │ 709.556 Plumas
9 │ 659.436 San Diego
10 │ 551.189 San Joaquin
11 │ 732.077 Santa Clara
```
Use the replicate design to compute standard errors of the estimated means.
```jldoctest meanlabel
julia> mean(:api00, :cname, bclus1)
11×3 DataFrame
Row │ cname mean SE
String15 Float64 Float64
Row │ mean SE cname
│ Float64 Float64 String
─────┼────────────────────────────────────
1 │ Santa Clara 732.077 58.2169
2 │ San Diego 659.436 2.66703
3 │ Merced 519.25 2.28936e-15
4 │ Los Angeles 647.267 47.6233
5 │ Orange 710.563 2.19826e-13
6 │ Fresno 472.0 1.13687e-13
7 │ Plumas 709.556 1.26058e-13
8 │ Alameda 669.0 1.27527e-13
9 │ San Joaquin 551.189 2.1791e-13
10 │ Kern 452.5 0.0
11 │ Mendocino 623.25 1.09545e-13
1 │ 732.077 58.2169 Santa Clara
2 │ 659.436 2.66703 San Diego
3 │ 519.25 2.28936e-15 Merced
4 │ 647.267 47.6233 Los Angeles
5 │ 710.563 2.19826e-13 Orange
6 │ 472.0 1.13687e-13 Fresno
7 │ 709.556 1.26058e-13 Plumas
8 │ 669.0 1.27527e-13 Alameda
9 │ 551.189 2.18162e-13 San Joaquin
10 │ 452.5 0.0 Kern
11 │ 623.25 1.09545e-13 Mendocino
```
"""
function mean(x::Symbol, domain, design::AbstractSurveyDesign)
Expand Down
11 changes: 5 additions & 6 deletions src/quantile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ Compute the standard error of the estimated quantile using replicate weights.
# Examples
```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights;)
```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights)
julia> quantile(:api00, bsrs, 0.5)
1×2 DataFrame
Row │ 0.5th percentile SE
Expand All @@ -66,7 +65,7 @@ function quantile(x::Symbol, design::ReplicateDesign, p::Real; kwargs...)
end

# Calculate the quantile and standard error
df = stderr(x, inner_quantile, design)
df = standarderror(x, inner_quantile, design)

rename!(df, :estimator => string(p) * "th percentile")

Expand Down Expand Up @@ -138,11 +137,11 @@ quantile(var, domain, design)
Estimate a quantile of domains.
```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
```jldoctest meanlabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> quantile(:api00, :cname, dclus1, 0.5)
11×2 DataFrame
Row │ 0.5th percentile cname
│ Float64 String15
Row │ 0.5th percentile cname
│ Float64 String
─────┼───────────────────────────────
1 │ 669.0 Alameda
2 │ 474.5 Fresno
Expand Down
13 changes: 6 additions & 7 deletions src/ratio.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ julia> apiclus1 = load_data("apiclus1");
julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
julia> ratio(:api00, :enroll, dclus1)
julia> ratio([:api00, :enroll], dclus1)
1×1 DataFrame
Row │ ratio
│ Float64
Expand Down Expand Up @@ -40,7 +40,6 @@ Compute the standard error of the ratio using replicate weights.
# Examples
```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = bootweights(dclus1);)
julia> ratio([:api00, :api99], bclus1)
1×2 DataFrame
Row │ estimator SE
Expand All @@ -58,20 +57,20 @@ function ratio(x::Vector{Symbol}, design::ReplicateDesign)
return sum(df[!, columns[1]], StatsBase.weights(df[!, weights_column])) / sum(df[!, columns[2]], StatsBase.weights(df[!, weights_column]))
end

# Calculate the standard error using the `stderr` function with the inner function
return stderr([variable_num, variable_den], inner_ratio, design)
# Calculate the standard error using the `standarderror` function with the inner function
return standarderror([variable_num, variable_den], inner_ratio, design)
end

"""
ratio(var, domain, design)
Estimate ratios of domains.
```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
```jldoctest ratiolabel; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> ratio([:api00, :api99], :cname, dclus1)
11×2 DataFrame
Row │ ratio cname
│ Float64 String15
Row │ ratio cname
│ Float64 String
─────┼──────────────────────
1 │ 1.09852 Alameda
2 │ 1.17779 Fresno
Expand Down
2 changes: 1 addition & 1 deletion src/reg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,5 @@ function glm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...)
end

# Compute standard error of coefficients
stderr(columns, inner_glm, design, args...; kwargs...)
standarderror(columns, inner_glm, design, args...; kwargs...)
end
58 changes: 29 additions & 29 deletions src/total.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Compute the standard error of the estimated total using replicate weights.
# Examples
```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
```jldoctest; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> total(:api00, bclus1)
1×2 DataFrame
Row │ total SE
Expand All @@ -52,7 +52,7 @@ function total(x::Symbol, design::ReplicateDesign)
end

# Calculate the total and standard error
df = stderr(x, inner_total, design)
df = standarderror(x, inner_total, design)

rename!(df, :estimator => :total)

Expand Down Expand Up @@ -95,43 +95,43 @@ end
Estimate population totals of domains.
```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
```jldoctest totallabel; setup = :(using Survey; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights)
julia> total(:api00, :cname, dclus1)
11×2 DataFrame
Row │ cname total
String15 Float64
Row │ total cname
│ Float64 String
─────┼─────────────────────────────
1 │ Alameda 249080.0
2 │ Fresno 63903.1
3 │ Kern 30631.5
4 │ Los Angeles 3.2862e5
5 │ Mendocino 84380.6
6 │ Merced 70300.2
7 │ Orange 3.84807e5
8 │ Plumas 2.16147e5
9 │ San Diego 1.2276e6
10 │ San Joaquin 6.90276e5
11 │ Santa Clara 6.44244e5
1 │ 249080.0 Alameda
2 │ 63903.1 Fresno
3 │ 30631.5 Kern
4 │ 3.2862e5 Los Angeles
5 │ 84380.6 Mendocino
6 │ 70300.2 Merced
7 │ 3.84807e5 Orange
8 │ 2.16147e5 Plumas
9 │ 1.2276e6 San Diego
10 │ 6.90276e5 San Joaquin
11 │ 6.44244e5 Santa Clara
```
Use the replicate design to compute standard errors of the estimated totals.
```jldoctest totallabel
julia> total(:api00, :cname, bclus1)
11×3 DataFrame
Row │ cname total SE
String15 Float64 Float64
Row │ total SE cname
│ Float64 Float64 String
─────┼────────────────────────────────────────────
1 │ Santa Clara 6.44244e5 4.2273e5
2 │ San Diego 1.2276e6 8.62727e5
3 │ Merced 70300.2 71336.3
4 │ Los Angeles 3.2862e5 2.93936e5
5 │ Orange 3.84807e5 3.88014e5
6 │ Fresno 63903.1 64781.7
7 │ Plumas 2.16147e5 2.12089e5
8 │ Alameda 249080.0 2.49228e5
9 │ San Joaquin 6.90276e5 6.81604e5
10 │ Kern 30631.5 30870.3
11 │ Mendocino 84380.6 80215.9
1 │ 6.44244e5 4.2273e5 Santa Clara
2 │ 1.2276e6 8.62727e5 San Diego
3 │ 70300.2 71336.3 Merced
4 │ 3.2862e5 2.93936e5 Los Angeles
5 │ 3.84807e5 3.88014e5 Orange
6 │ 63903.1 64781.7 Fresno
7 │ 2.16147e5 2.12089e5 Plumas
8 │ 249080.0 2.49228e5 Alameda
9 │ 6.90276e5 6.81604e5 San Joaquin
10 │ 30631.5 30870.3 Kern
11 │ 84380.6 80215.9 Mendocino
```
"""
function total(x::Symbol, domain, design::AbstractSurveyDesign)
Expand Down

0 comments on commit 9da1fd1

Please sign in to comment.