verifier.py

from sklearn import metrics
import numpy as np


def verify(curr_model, test_x, test_y, h, g, alpha=0.001):
    """
    Updates the curr_model model object to incorporate the (h,g).

    Inputs:
    curr_model: model object that is to be updated
    holdout_data: data to test the proposed new model on
    h_t: new model
    g_t: function from X -> {0,1} which returns 1 if x is in identified group and 0 else.

    Return: None
    """
    # pull the x and y values that belong to g
    indices = test_x.apply(g, axis=1) == 1
    xs = test_x[indices]
    ys = test_y[indices]

    # get predicted ys from current model and proposed h
    curr_model_preds = xs.apply(curr_model.predict, axis=1)
    h_preds = h(xs)

    # measure the error of current model and proposed h
    curr_model_error = metrics.zero_one_loss(ys, curr_model_preds)
    h_error = metrics.zero_one_loss(ys, h_preds)

    # determine if (g,h) should be accepted or not
    group_weight = sum(indices) / float(len(test_x))
    improvement = curr_model_error - h_error

    if group_weight * improvement >= alpha:
        return True

    return False


def is_proposed_group_good(curr_model, test_x, test_y, h, g):
    """
    Checks that the group error of g on h isn't worse than g on fun.
    Doesn't worry about weight of group
    Inputs:
    curr_model: model object that is to be updated
    holdout_data: data to test the proposed new model on
    h_t: new model
    g_t: function from X -> {0,1} which returns 1 if x is in identified group and 0 else.

    Return: None
    """
    # pull the x and y values that belong to g
    indices = test_x.apply(g, axis=1) == 1
    xs = test_x[indices]
    ys = test_y[indices]

    # get predicted ys from current model and proposed h
    curr_model_preds = xs.apply(curr_model.predict, axis=1)
    h_preds = h(xs)

    # measure the error of current model and proposed h
    curr_model_error = metrics.zero_one_loss(ys, curr_model_preds)
    h_error = metrics.zero_one_loss(ys, h_preds)

    print("Error of current model on proposed group: %s" % curr_model_error)
    print("Error of h trained on proposed group: %s" % h_error)

    if h_error >= curr_model_error:
        return False

    else:
        return True


def is_proposed_group_good_csc(curr_model, test_x, test_y, h, g):
    """
    Checks that the group error of g on h isn't worse than g on fun.
    Doesn't worry about weight of group
    Inputs:
    curr_model: model object that is to be updated
    holdout_data: data to test the proposed new model on
    h_t: new model
    g_t: function from X -> {0,1} which returns 1 if x is in identified group and 0 else.

    Return: None
    """
    # pull the x and y values that belong to g
    indices = test_x.apply(g, axis=1) == 1
    xs = test_x[indices]
    ys = test_y[indices]

    # get predicted ys from current model and proposed h
    curr_model_preds = xs.apply(curr_model.predict, axis=1)

    # reshaping to mesh with how h takes inputs
    def _h(x):
        _x = np.array(x).reshape(1, -1)
        return h(_x)[0]

    h_preds = xs.apply(_h, axis=1)

    # measure the error of current model and proposed h
    curr_model_error = metrics.zero_one_loss(ys, curr_model_preds)
    h_error = metrics.zero_one_loss(ys, h_preds)

    print("Error of current model on proposed group: %s" % curr_model_error)
    print("Error of h trained on proposed group: %s" % h_error)

    if h_error >= curr_model_error:
        return False

    else:
        return True


def check_group_sizes(test_x, group):
    # Returns True if the group has more than 0 elements in test_x
    indices = test_x.apply(group, axis=1) == 1
    if sum(indices) >= 1:
        return True
    else:
        return False