Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update case study of hierarchical models for binary trials #214

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions knitr/pool-binary-trials/hier-logit-centered.stan
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,5 @@ model {
y ~ binomial_logit(K, alpha); // likelihood
}
generated quantities {
vector[N] theta; // chance of success

for (n in 1 : N) {
theta[n] = inv_logit(alpha[n]);
}
vector[N] theta = inv_logit(alpha);
}
100 changes: 8 additions & 92 deletions knitr/pool-binary-trials/hier-logit.stan
Original file line number Diff line number Diff line change
@@ -1,106 +1,22 @@
data {
int<lower=0> N; // items
array[N] int<lower=0> K; // initial trials
array[N] int<lower=0> y; // initial successes

array[N] int<lower=0> K_new; // new trials
array[N] int<lower=0> y_new; // new successes
}
transformed data {
real min_y; // minimum successes
real max_y; // maximum successes
real mean_y; // sample mean successes
real sd_y; // sample std dev successes

min_y = min(y);
max_y = max(y);
mean_y = mean(to_vector(y));
sd_y = sd(to_vector(y));
}
#include data-blocks.stan
parameters {
real mu; // population mean of success log-odds
real<lower=0> sigma; // population sd of success log-odds
vector[N] alpha_std; // success log-odds (standardized)
vector<offset=mu, multiplier=sigma>[N] alpha_std; // success log-odds (standardized)
}
model {
mu ~ normal(-1, 1); // hyperprior
sigma ~ normal(0, 1); // hyperprior
alpha_std ~ normal(0, 1); // prior (hierarchical)
y ~ binomial_logit(K, mu + sigma * alpha_std); // likelihood
alpha_std ~ normal(mu, sigma); // prior (hierarchical)
y ~ binomial_logit(K, alpha_std); // likelihood
}
generated quantities {
vector[N] theta; // chance of success

real log_p_new; // posterior predictive log density remaining trials

array[N] int<lower=0> z; // posterior prediction remaining trials

int<lower=0, upper=1> some_ability_gt_350; // Pr[some theta > 0.35]
array[N] int<lower=0, upper=1> avg_gt_400; // Pr[season avg of n] >= 0.400
array[N] int<lower=0, upper=1> ability_gt_400; // Pr[chance-of-success of n] >= 0.400

array[N] int<lower=1, upper=N> rnk; // rank of player n
array[N] int<lower=0, upper=1> is_best; // Pr[player n highest chance of success]

array[N] int<lower=0> y_rep; // replications for existing items
vector[N] theta = inv_logit(alpha_std);
#include gq-postpred.stan
#include gq-ranking.stan

array[N] int<lower=0> y_pop_rep; // replications for simulated items

real min_y_rep; // posterior predictive min replicated successes
real max_y_rep; // posterior predictive max replicated successes
real mean_y_rep; // posterior predictive sample mean replicated successes
real sd_y_rep; // posterior predictive sample std dev replicated successes

int p_min; // posterior predictive p-values
int p_max;
int p_mean;
int p_sd;

for (n in 1 : N) {
theta[n] = inv_logit(mu + sigma * alpha_std[n]);
}

log_p_new = 0;
for (n in 1 : N) {
log_p_new = log_p_new + binomial_lpmf(y_new[n] | K_new[n], theta[n]);
}

for (n in 1 : N) {
z[n] = binomial_rng(K_new[n], theta[n]);
}

some_ability_gt_350 = max(theta) > 0.35;
for (n in 1 : N) {
avg_gt_400[n] = ((y[n] + z[n]) / (0.0 + K[n] + K_new[n])) > 0.400;
}
for (n in 1 : N) {
ability_gt_400[n] = theta[n] > 0.400;
}

{
array[N] int dsc;
dsc = sort_indices_desc(theta);
for (n in 1 : N) {
rnk[dsc[n]] = n;
}
}
for (n in 1 : N) {
is_best[n] = rnk[n] == 1;
}

for (n in 1 : N) {
y_rep[n] = binomial_rng(K[n], theta[n]);
}
for (n in 1 : N) {
y_pop_rep[n] = binomial_rng(K[n], inv_logit(normal_rng(mu, sigma)));
}

min_y_rep = min(y_rep);
max_y_rep = max(y_rep);
mean_y_rep = mean(to_vector(y_rep));
sd_y_rep = sd(to_vector(y_rep));

p_min = min_y_rep >= min_y;
p_max = max_y_rep >= max_y;
p_mean = mean_y_rep >= mean_y;
p_sd = sd_y_rep >= sd_y;
}
90 changes: 6 additions & 84 deletions knitr/pool-binary-trials/hier.stan
Original file line number Diff line number Diff line change
@@ -1,22 +1,4 @@
data {
int<lower=0> N; // items
array[N] int<lower=0> K; // initial trials
array[N] int<lower=0> y; // initial successes

array[N] int<lower=0> K_new; // new trials
array[N] int<lower=0> y_new; // new successes
}
transformed data {
real min_y; // minimum successes
real max_y; // maximum successes
real mean_y; // sample mean successes
real sd_y; // sample std dev successes

min_y = min(y);
max_y = max(y);
mean_y = mean(to_vector(y));
sd_y = sd(to_vector(y));
}
#include data-blocks.stan
parameters {
real<lower=0, upper=1> phi; // population chance of success
real<lower=1> kappa; // population concentration
Expand All @@ -28,73 +10,13 @@ model {
y ~ binomial(K, theta); // likelihood
}
generated quantities {
real log_p_new; // posterior predictive log density remaining trials

array[N] int<lower=0> z; // posterior prediction remaining trials

int<lower=0, upper=1> some_ability_gt_350; // Pr[some theta > 0.35]
array[N] int<lower=0, upper=1> avg_gt_400; // Pr[season avg of n] >= 0.400
array[N] int<lower=0, upper=1> ability_gt_400; // Pr[chance-of-success of n] >= 0.400

array[N] int<lower=1, upper=N> rnk; // rank of player n
array[N] int<lower=0, upper=1> is_best; // Pr[player n highest chance of success]

array[N] int<lower=0> y_rep; // replications for existing items
array[N] int<lower=0> y_pop_rep; // replications for simulated items

real min_y_rep; // posterior predictive min replicated successes
real max_y_rep; // posterior predictive max replicated successes
real mean_y_rep; // posterior predictive sample mean replicated successes
real sd_y_rep; // posterior predictive sample std dev replicated successes

int p_min; // posterior predictive p-values
int p_max;
int p_mean;
int p_sd;

log_p_new = 0;
for (n in 1 : N) {
log_p_new = log_p_new + binomial_lpmf(y_new[n] | K_new[n], theta[n]);
}

for (n in 1 : N) {
z[n] = binomial_rng(K_new[n], theta[n]);
}

some_ability_gt_350 = max(theta) > 0.35;
for (n in 1 : N) {
avg_gt_400[n] = ((y[n] + z[n]) / (0.0 + K[n] + K_new[n])) > 0.400;
}
for (n in 1 : N) {
ability_gt_400[n] = theta[n] > 0.400;
}

{
array[N] int dsc;
dsc = sort_indices_desc(theta);
for (n in 1 : N) {
rnk[dsc[n]] = n;
}
}
for (n in 1 : N) {
is_best[n] = rnk[n] == 1;
}

for (n in 1 : N) {
y_rep[n] = binomial_rng(K[n], theta[n]);
}
#include gq-postpred.stan
#include gq-ranking.stan

// replications for simulated items
array[N] int<lower=0> y_pop_rep;
for (n in 1 : N) {
y_pop_rep[n] = binomial_rng(K[n],
beta_rng(phi * kappa, (1 - phi) * kappa));
}

min_y_rep = min(y_rep);
max_y_rep = max(y_rep);
mean_y_rep = mean(to_vector(y_rep));
sd_y_rep = sd(to_vector(y_rep));

p_min = min_y_rep >= min_y;
p_max = max_y_rep >= max_y;
p_mean = mean_y_rep >= mean_y;
p_sd = sd_y_rep >= sd_y;
}
16 changes: 16 additions & 0 deletions knitr/pool-binary-trials/include/data-blocks.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/** observed outcome y for N items in K, K_new binary trials **/
data {
int<lower=0> N; // items
array[N] int<lower=0> K; // initial trials
array[N] int<lower=0> y; // initial successes

array[N] int<lower=0> K_new; // new trials
array[N] int<lower=0> y_new; // new successes
}
transformed data {
// summary stats for observed data
real min_y = min(y);
real max_y = max(y);
real mean_y = mean(to_vector(y));
real sd_y = sd(to_vector(y));
}
46 changes: 46 additions & 0 deletions knitr/pool-binary-trials/include/gq-postpred.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/** include in generated quantities block of model with parameter
vector<lower=0, upper=1>[N] theta; // chance of success
and data: N, K, y, K_new, y_new, min_y, max_y, mean_y, sd_y
*/

// posterior predictive log density remaining trials
real log_p_new = 0;
for (n in 1 : N) {
log_p_new = log_p_new + binomial_lpmf(y_new[n] | K_new[n], theta[n]);
}

// posterior predictions on new data
array[N] int<lower=0> z = binomial_rng(K_new, theta);

// Pr[some theta > 0.35]
int<lower=0, upper=1> some_ability_gt_350 = max(theta) > 0.35;
// Pr[some player ability > 400]
array[N] int<lower=0, upper=1> ability_gt_400;
for (n in 1 : N) {
ability_gt_400[n] = theta[n] > 0.400;
}
// Pr[some player season average > 400]
array[N] int<lower=0, upper=1> avg_gt_400;
for (n in 1 : N) {
// y[n] - observed, z[n] - expected hits in rest of season
avg_gt_400[n] = ((y[n] + z[n]) / (0.0 + K[n] + K_new[n])) > 0.400;
}

// replications for existing items
array[N] int<lower=0> y_rep = binomial_rng(K, theta);

// posterior predictive min replicated successes, test stat p_val
real<lower=0> min_y_rep = min(y_rep);
int<lower=0, upper=1> p_min = min_y_rep >= min_y;

// posterior predictive max replicated successes, test stat p_val
real<lower=0> max_y_rep = max(y_rep);
int<lower=0, upper=1> p_max = max_y_rep >= max_y;

// posterior predictive sample mean replicated successes, test stat p_val
real<lower=0> mean_y_rep = mean(to_vector(y_rep));
int<lower=0, upper=1> p_mean = mean_y_rep >= mean_y;

// posterior predictive sample std dev replicated successes, test stat p_val
real<lower=0> sd_y_rep = sd(to_vector(y_rep));
int<lower=0, upper=1> p_sd = sd_y_rep >= sd_y;
16 changes: 16 additions & 0 deletions knitr/pool-binary-trials/include/gq-ranking.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/** include in generated quantities block of model with parameter
vector<lower=0, upper=1>[N] theta; // chance of success
and data N (number of observations)
*/
array[N] int<lower=1, upper=N> rnk; // rank of player n
{
array[N] int dsc;
dsc = sort_indices_desc(theta);
for (n in 1 : N) {
rnk[dsc[n]] = n;
}
}
array[N] int<lower=0, upper=1> is_best; // Pr[player n highest chance of success]
for (n in 1 : N) {
is_best[n] = rnk[n] == 1;
}
Loading