diff --git a/GroupTesting.egg-info/PKG-INFO b/GroupTesting.egg-info/PKG-INFO new file mode 100644 index 0000000..c9c3c7f --- /dev/null +++ b/GroupTesting.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: GroupTesting +Version: 0.1.0 +Summary: Group testing for SARS-CoV-2 in large populations. +Home-page: https://github.com/WGS-TB/GroupTesting +Author: Hooman Zabeti, Nick Dexter +Author-email: UNKNOWN +License: TBD +Description: UNKNOWN +Platform: UNKNOWN diff --git a/GroupTesting.egg-info/SOURCES.txt b/GroupTesting.egg-info/SOURCES.txt new file mode 100644 index 0000000..d423847 --- /dev/null +++ b/GroupTesting.egg-info/SOURCES.txt @@ -0,0 +1,17 @@ +MANIFEST.in +README.md +setup.py +GroupTesting.egg-info/PKG-INFO +GroupTesting.egg-info/SOURCES.txt +GroupTesting.egg-info/dependency_links.txt +GroupTesting.egg-info/entry_points.txt +GroupTesting.egg-info/requires.txt +GroupTesting.egg-info/top_level.txt +group_testing/__init__.py +group_testing/generate_groups.py +group_testing/generate_individual_status.py +group_testing/generate_test_results.py +group_testing/group_testing.py +group_testing/group_testing_decoder.py +group_testing/group_testing_evaluation.py +group_testing/utils.py \ No newline at end of file diff --git a/GroupTesting.egg-info/dependency_links.txt b/GroupTesting.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/GroupTesting.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/GroupTesting.egg-info/entry_points.txt b/GroupTesting.egg-info/entry_points.txt new file mode 100644 index 0000000..42632ad --- /dev/null +++ b/GroupTesting.egg-info/entry_points.txt @@ -0,0 +1,4 @@ + + [console_scripts] + GroupTesting = group_testing.group_testing:main + \ No newline at end of file diff --git a/GroupTesting.egg-info/requires.txt b/GroupTesting.egg-info/requires.txt new file mode 100644 index 0000000..86cdf9b --- /dev/null +++ b/GroupTesting.egg-info/requires.txt @@ -0,0 +1,9 @@ +cplex +numpy +pandas +pulp +pymprog +python-igraph +pyyaml +scipy +sklearn diff --git a/GroupTesting.egg-info/top_level.txt b/GroupTesting.egg-info/top_level.txt new file mode 100644 index 0000000..bb06418 --- /dev/null +++ b/GroupTesting.egg-info/top_level.txt @@ -0,0 +1 @@ +group_testing diff --git a/build/lib/group_testing/__init__.py b/build/lib/group_testing/__init__.py new file mode 100644 index 0000000..7ca9c90 --- /dev/null +++ b/build/lib/group_testing/__init__.py @@ -0,0 +1,2 @@ +_program = "GroupTesting" +__version__ = "0.1.0" \ No newline at end of file diff --git a/build/lib/group_testing/generate_groups.py b/build/lib/group_testing/generate_groups.py new file mode 100644 index 0000000..e699fc4 --- /dev/null +++ b/build/lib/group_testing/generate_groups.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 25 13:45:50 2020 + +@author: ndexter +""" + +# import standard libraries +import sys, math +from os import path +import random +import numpy as np + +# np.set_printoptions(threshold=np.inf) +np.set_printoptions(edgeitems=60, linewidth=100000, + formatter=dict(float=lambda x: "%.3g" % x)) +import igraph +import scipy.io as sio + +# import hdf5storage +# import matplotlib +# atplotlib.use('Agg') +# import unittest + +# import plotting and data-manipulation tools +# import matplotlib.pyplot as plt + +""" +Unit testing to be added later +""" +# class TestMeasurementMatrix(unittest.TestCase): +# +# def test_make_graph(self): +# gen_measurement_matrix(opts) + + +""" +Function to generate and return the matrix +""" + + +def gen_measurement_matrix(seed=0, N=1000, m=100, group_size=4, max_tests_per_individual=4, verbose=False, + graph_gen_method='no_multiple', plotting=False, saving=True, run_ID='debugging'): + # set the seed used for graph generation to the options seed + random.seed(seed) + np_random = np.random.RandomState(seed) + # compute tests per individual needed to satisfy + # N*tests_per_individual == m*group_size + avg_test_split = math.floor(m * group_size / N) + if verbose: + print('number of tests per individual needed to satisfy ' \ + + '(N*tests_per_individual == m*group_size) = ' + str(avg_test_split)) + + # verify the given parameters can lead to a valid testing scheme: + # if floor(m*group_size/N) > max_tests_per_individual, we would need to + # test each individual more times than the maximum allowed to satisfy + # the group size constraint + try: + # ensure that we're not violating the maximum + assert avg_test_split <= max_tests_per_individual + except AssertionError: + errstr = ('Assertion Failed: With group_size = ' + str(group_size) + ', since m = ' + str(m) + + ' and N = ' + str(N) + ' we have floor(m*group_size/N) = ' + str(avg_test_split) + + 'which exceeds max_tests_per_individual = ' + str(max_tests_per_individual)) + print(errstr) + sys.exit() + + # compute the actual tests per individual + # note: we may end up with individuals being tested less than the maximum + # allowed number of tests, but this is not a problem (only the opposite is) + tests_per_individual = max(min(max_tests_per_individual, avg_test_split), 1) + + if verbose: + print("tests_per_individual = " + str(tests_per_individual)) + + # In this model, the first N elements of the degree sequence correspond + # to the population, while the last m elements correspond to the groups + + # in-degree corresponds to the individuals, here we set the first N + # entries of the in-degree sequence to specify the individuals + indeg = np.zeros(N + m) + indeg[0:N] = tests_per_individual + + # out-degree corresponds to the group tests, here we set the last m + # entries of the out-degree sequence to specify the groups + outdeg = np.zeros(N + m) + outdeg[N:(N + m)] = group_size + + # keep track of vertex types for final graph vertex coloring + vtypes = np.zeros(N + m) + vtypes[0:N] = 1 + + # output the sum of indeg and outdeg if checking conditions + if verbose: + print("out degree sequence: {}".format(outdeg.tolist())) + print("in degree sequence: {}".format(indeg.tolist())) + print("sum outdeg (groups) = {}".format(np.sum(outdeg))) + print("sum indeg (individ) = {}".format(np.sum(indeg))) + + # check if we have too many individuals being tested + # if np.sum(indeg) > np.sum(outdeg): + while (np.sum(indeg) > np.sum(outdeg)): + nz_indeg = indeg[(indeg > 0)] + max_indices = np.array(np.where(indeg == nz_indeg.max())) + index = np_random.randint(max_indices.shape[1], size=1) + indeg[max_indices[0, index]] = indeg[max_indices[0, index]] - 1 + + # check if the number of tests per individual is less than the max, and, + # if we can, fix it + if tests_per_individual < max_tests_per_individual: + if (np.sum(indeg) < np.sum(outdeg)): + while (np.sum(indeg) < np.sum(outdeg)): + # select index randomly (bad: might cause largest index to be updated, + # possibly exceed max_tests_per_individual) + # index = np.random.randint(0,N) + + # select index corresponding to one of the minimum indices + nz_indeg = indeg[(indeg > 0)] + min_indices = np.array(np.where(indeg == nz_indeg.min())) + # min_indices = indeg[(indeg == nz_indeg.min())]# & (indeg > 0)] + # min_indices = np.array(np.where((indeg == indeg.min()) & (indeg > 0))) + # print(min_indices) + # print(min_indices.shape[1]) + index = np_random.randint(min_indices.shape[1], size=1) + # print('generated index = ' + str(index)) + indeg[min_indices[0, index]] = indeg[min_indices[0, index]] + 1 + + # output stats after fixing + if verbose: + print("after fixing") + print("out degree sequence: {}".format(outdeg.tolist())) + print("in degree sequence: {}".format(indeg.tolist())) + print("sum outdeg (groups) = {}".format(np.sum(outdeg))) + print("sum indeg (individ) = {}".format(np.sum(indeg))) + + else: + try: + assert np.sum(outdeg) == np.sum(indeg) + except AssertionError: + errstr = ("Assertion Failed: Require sum(outdeg) = " + str(np.sum(outdeg)) + " == " \ + + str(np.sum(indeg)) + " = sum(indeg)") + print(errstr) + print("out degree sequence: {}".format(outdeg.tolist())) + print("in degree sequence: {}".format(indeg.tolist())) + # sys.exit() + + # generate the graph + try: + assert igraph._igraph.is_graphical_degree_sequence(outdeg.tolist(), indeg.tolist()) + g = igraph.Graph.Degree_Sequence(outdeg.tolist(), indeg.tolist(), graph_gen_method) + g.vs['vertex_type'] = vtypes + assert np.sum(outdeg) == len(g.get_edgelist()) + except AssertionError: + errstr = ("Assertion Failed: Require [sum(outdeg) = " + str(np.sum(outdeg)) + "] == [" \ + + str(np.sum(indeg)) + " = sum(indeg)] == [ |E(G)| = " + str(len(g.get_edgelist())) + "]") + except igraph._igraph.InternalError as err: + print("igraph InternalError (likely invalid outdeg or indeg sequence): {0}".format(err)) + print("out degree sequence: {}".format(outdeg.tolist())) + print("in degree sequence: {}".format(indeg.tolist())) + sys.exit() + except: + print("Unexpected error:", sys.exec_info()[0]) + sys.exit() + else: + # get the adjacency matrix corresponding to the nodes of the graph + A = np.array(g.get_adjacency()._get_data()) + if verbose: + # print(g) + # print(A) + # print("before resizing") + # print(A.shape) + print("row sum {}".format(np.sum(A, axis=1))) + print("column sum {}".format(np.sum(A, axis=0))) + + # the generated matrix has nonzeros in bottom left with zeros + # everywhere else, resize to it's m x N + A = A[N:m + N, 0:N] + # A = np.minimum(A, 1) + + # check if the graph corresponds to a bipartite graph + check_bipartite = g.is_bipartite() + + # save the row and column sums + row_sum = np.sum(A, axis=1) + col_sum = np.sum(A, axis=0) + + # display properties of A and graph g + if verbose: + # print(A) + # print("after resizing") + # print(A.shape) + print("row sum {}".format(row_sum)) + print("column sum {}".format(col_sum)) + print("max row sum {}".format(max(row_sum))) + print("max column sum {}".format(max(col_sum))) + print("min row sum {}".format(min(row_sum))) + print("min column sum {}".format(min(col_sum))) + print("g is bipartite: {}".format(check_bipartite)) + + # set options and plot corresponding graph + if plotting: + layout = g.layout("auto") + color_dict = {1: "blue", 0: "red"} + g.vs['color'] = [color_dict[vertex_type] for vertex_type in g.vs['vertex_type']] + B = g.vs.select(vertex_type='B') + C = g.vs.select(vertex_type='C') + visual_style = {} + visual_style['vertex_size'] = 10 + visual_style['layout'] = layout + visual_style['edge_width'] = 0.5 + visual_style['edge_arrow_width'] = 0.2 + visual_style['bbox'] = (1200, 1200) + igraph.drawing.plot(g, **visual_style) + + # save data to a MATLAB ".mat" file + data_filename = run_ID + '_generate_groups_output.mat' + # if saving: + # data = {} + # # data['A'] = A + # data['bipartite'] = check_bipartite + # data['indeg'] = indeg + # data['outdeg'] = outdeg + # data['min_col_sum'] = min(col_sum) + # data['min_row_sum'] = min(row_sum) + # data['max_col_sum'] = max(col_sum) + # data['max_row_sum'] = max(row_sum) + # data['opts'] = opts + # sio.savemat(opts['data_filename'], data) + + # return the adjacency matrix of the graph + return A + + +# main method for testing +if __name__ == '__main__': + + # print igraph version + print("Loaded igraph version {}".format(igraph.__version__)) + + # options for plotting, verbose output, saving, seed + opts = {} + opts['m'] = 100 + opts['N'] = 500 + opts['group_size'] = 30 + opts['max_tests_per_individual'] = 15 + opts['graph_gen_method'] = 'no_multiple' # options are "no_multiple" or "simple" + opts['verbose'] = True # False + opts['plotting'] = False # False + opts['saving'] = True + opts['run_ID'] = 'GT_matrix_generation_component' + #opts['data_filename'] = opts['run_ID'] + '_generate_groups_output.mat' + opts['seed'] = 0 + + # generate the measurement matrix with igraph + A = gen_measurement_matrix(**opts) + + # print shape of matrix + if opts['verbose']: + print("Generated adjacency matrix of size:") + print(A.shape) diff --git a/build/lib/group_testing/generate_individual_status.py b/build/lib/group_testing/generate_individual_status.py new file mode 100644 index 0000000..4f8827d --- /dev/null +++ b/build/lib/group_testing/generate_individual_status.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 25 13:45:50 2020 + +@author: ndexter +""" + +import sys, math +from os import path +import random +import numpy as np +#np.set_printoptions(threshold=np.inf) +np.set_printoptions(edgeitems=60, linewidth=100000, + formatter=dict(float=lambda x: "%.3g" % x)) +import scipy.io as sio + +""" +Function to generate the infected status of individuals (a vector) +""" +def gen_status_vector(seed=0, N=1000, s=10, verbose=False): + + # set the seed used for status generation + local_random = random.Random() + local_random.seed(seed) + np.random.seed(seed) + + # generate a random vector having sparsity level s + indices = local_random.sample(range(N), s) + u = np.zeros((N, 1)) + for i in indices: + u[i] = 1 + + try: + assert np.sum(u) == s + except AssertionError: + errstr = ("Assertion Failed: opts['s'] = " + str(s) \ + + ", since sum(u) = " + str(np.sum(u))) + print(errstr) + sys.exit() + + return u + +if __name__ == '__main__': + + # options for plotting, verbose output, saving, seed + opts = {} + opts['N'] = 500 + opts['s'] = 20 + opts['verbose'] = True #False + # opts['plotting'] = True #False + # opts['saving'] = True + # opts['run_ID'] = 'GT_status_vector_generation_component' + # opts['data_filename'] = opts['run_ID'] + '_generate_groups_output.mat' + opts['seed'] = 0 + + u = gen_status_vector(**opts) + + # print shape of matrix + if opts['verbose']: + print("Generated status vector of size:") + print(u.shape) diff --git a/build/lib/group_testing/generate_test_results.py b/build/lib/group_testing/generate_test_results.py new file mode 100644 index 0000000..852ea68 --- /dev/null +++ b/build/lib/group_testing/generate_test_results.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 25 13:45:50 2020 + +@author: ndexter +""" + +import sys, math +from os import path +import random +import numpy as np + +from group_testing.generate_groups import gen_measurement_matrix +from group_testing.generate_individual_status import gen_status_vector +import group_testing.utils as utils + +# np.set_printoptions(threshold=np.inf) +np.set_printoptions(edgeitems=60, linewidth=100000, + formatter=dict(float=lambda x: "%.3g" % x)) +import scipy.io as sio + +""" +Function to generate the results of the tests from the measurement matrix A +and the status vector u +""" + + +def gen_test_vector(A, u, seed=0, m=100, verbose=False, test_noise_methods=[], binary_symmetric_noise_prob=0.26, + theta_l=0, theta_u=0.0625, permutation_noise_prob=0.01): + # set the seed used for test result noise + random.seed(seed) + np_random = np.random.RandomState(seed) + + # generate the tests directly from A and u + b = np.matmul(A, u) + + if verbose: + print('before minimum:') + print(b) + + # rescale test results to 1 + b_temp = np.minimum(b, 1) + + if verbose: + print('after minimum') + print(np.c_[b, b_temp]) + + # replace the original vector with the noisy vector + b = np.array(b_temp) + + for method in test_noise_methods: + + if method == 'binary_symmetric': + + rho = binary_symmetric_noise_prob + + indices = np.arange(m) + + weight_vec = np.ones(m) + weight_vec = weight_vec / np.sum(weight_vec) + + num_flip = math.ceil(m * rho) + + vec = np_random.choice(indices, size=num_flip, replace=False, p=weight_vec) + + # copy b into a new vector for adding noise + b_noisy = np.array(b) + + if verbose: + print('before flipping') + print(b_noisy) + + # flip the resulting entries + for v in vec: + b_noisy[v] = (b_noisy[v] + 1) % 2 + + if verbose: + print('weight vector') + print(weight_vec) + print('indices to flip') + print(vec) + print('after flipping - left: b, right: b_noisy') + print(np.c_[b, b_noisy]) + print('number of affected tests') + print(np.sum(abs(b - b_noisy))) + print('expected number') + print(math.ceil(binary_symmetric_noise_prob * m)) + + # replace the original vector with the noisy vector + b = np.array(b_noisy) + + elif method == 'threshold': + + # find the group sizes + Asum = np.sum(A, axis=1) + + print('sum is ' + str(Asum.shape)) + + # copy b into a new vector for adding noise + b_noisy = np.array(b) + num_of_infected = np.matmul(A, u) + if verbose: + print('before threshold noise') + print(b_noisy) + + # apply the threshold noise to b_noisy + for i in range(m): + + if b_noisy[i] == 1: + if num_of_infected[i] / Asum[i] >= theta_u: + b_noisy[i] = 1 + elif num_of_infected[i] / Asum[i] <= theta_l: + b_noisy[i] = 0 + elif num_of_infected[i] / Asum[i] > theta_l and num_of_infected[i] / Asum[i] < theta_u: + # probability 1/2 of 0 or 1 + # b_noisy[i] = np.random.randint(2) + + # instead use probability of false negatives = 1/10 + b_noisy[i] = np_random.choice(np.arange(2), p=[0.1, 0.9]) + + if verbose: + print('after threshold noise - left: b, right: b_noisy') + print(np.c_[b, b_noisy]) + + # replace the original vector with the noisy vector + b = np.array(b_noisy) + + elif method == 'permutation': + # percentage of indices to permute + rho = permutation_noise_prob + + # get the indices from which to select a subset to permute + indices = np.arange(m) + + # permute all indices with equal probability + weight_vec = np.ones(m) + weight_vec = weight_vec / np.sum(weight_vec) + + # determine the number of indices that need to be permuted + num_permute = math.ceil(m * rho) + + # choose the indices to permute + try: + assert 2 * num_permute <= m + except AssertionError as e: + print('number of permuted items exceeds m, incorrect range for rho?') + sys.exit() + else: + vec = np_random.choice(indices, size=2 * num_permute, replace=False, p=weight_vec) + + # copy b into a new vector for adding noise + b_noisy = np.array(b) + + if verbose: + print('before permuting') + print(b_noisy) + + # find a permutation of the randomly selected indices + # permuted_vec = np.random.permutation(vec) + + # print(vec) + + for i in range(num_permute): + b_noisy[vec[i]] = b[vec[i + num_permute]] + b_noisy[vec[i + num_permute]] = b[vec[i]] + # print(np.c_[indices, b, b_noisy]) + + # if verbose: + # print('vector of indices to permute') + # print(vec) + # print('permutation of randomly selected indices') + # print(permuted_vec) + # print('original vector on indices to permute') + # print(b_noisy[vec]) + # print('resulting permuted test results associated with those indices') + # print(b_noisy[permuted_vec]) + + # permute the original test results to add noise + # b_noisy[vec] = b_noisy[permuted_vec] + + if verbose: + print('after permuting - left: b, right: b_noisy') + print(np.c_[b, b_noisy]) + + # replace the original vector with the noisy vector + b = np.array(b_noisy) + return b + + +if __name__ == '__main__': + + # options for plotting, verbose output, saving, seed + opts = {} + opts['m'] = 20 + opts['N'] = 400 + opts['s'] = math.ceil(0.04 * opts['N']) + opts['run_ID'] = 'GT_test_result_vector_generation_component' + opts['data_filename'] = opts['run_ID'] + '_generate_groups_output.mat' + + # noise types to test + + # opts['test_noise_methods'] = ['permutation'] + opts['test_noise_methods'] = [] + + for method in opts['test_noise_methods']: + print('adding ' + method + ' noise', end=' ') + if method == 'threshold': + opts['theta_l'] = 0.02 + opts['theta_u'] = 0.10 + print('with theta_l = ' + str(opts['theta_l']) + ' and theta_u = ' + str(opts['theta_u'])) + elif method == 'binary_symmetric': + opts['binary_symmetric_noise_prob'] = 0.26 + print('with binary_symmetric_noise_probability = ' + str(opts['binary_symmetric_noise_prob'])) + elif method == 'permutation': + opts['permutation_noise_prob'] = 0.15 + print('with permutation_noise_probability = ' + str(opts['permutation_noise_prob'])) + + opts['seed'] = 0 + opts['group_size'] = 30 + opts['max_tests_per_individual'] = 15 + opts['graph_gen_method'] = 'no_multiple' + opts['verbose'] = False + opts['plotting'] = False + opts['saving'] = False + passing_param, _ = utils.param_distributor(opts, gen_measurement_matrix) + A = gen_measurement_matrix(**passing_param) + passing_param, _ = utils.param_distributor(opts, gen_status_vector) + u = gen_status_vector(**passing_param) + + opts['verbose'] = True + opts['plotting'] = False + opts['saving'] = True + b = gen_test_vector(A, u, opts) + + # print shape of matrix + if opts['verbose']: + print("Generated test result vector of size:") + print(b.shape) diff --git a/build/lib/group_testing/group_testing.py b/build/lib/group_testing/group_testing.py new file mode 100644 index 0000000..c4a3559 --- /dev/null +++ b/build/lib/group_testing/group_testing.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +@author: hzabeti +""" + +import argparse +from multiprocessing import Pool +from multiprocessing import cpu_count +import numpy as np +import pandas as pd +import itertools +import os +import datetime +import time +import yaml +from shutil import copyfile +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import make_scorer +from sklearn.model_selection import StratifiedKFold +from sklearn.metrics import roc_curve, auc, balanced_accuracy_score +from sklearn.metrics import accuracy_score +import sys + +from group_testing import __version__ +from group_testing.generate_groups import gen_measurement_matrix +from group_testing.generate_individual_status import gen_status_vector +from group_testing.generate_test_results import gen_test_vector +from group_testing.group_testing_evaluation import decoder_evaluation +from group_testing.group_testing_decoder import GroupTestingDecoder +import group_testing.utils as utils + + +def multi_process_group_testing(design_param, decoder_param): + try: + single_run_start = time.time() + # generate the measurement matrix from the given options + if 'group_size' in design_param.keys() and str(design_param['group_size']).lower()=='auto': + assert 'N' in design_param.keys(), "To generate the group size automatically parameter 'N' is needed to be" \ + "defined in the config file" + assert 's' in design_param.keys(), "To generate the group size automatically parameter 's' is needed to be" \ + "defined in the config file" + assert design_param['s'] <= design_param['N'], " 's'> 'N': number of infected individuals can not be " \ + "greater than number of individuals." + design_param['group_size'] = utils.auto_group_size(design_param['N'],design_param['s']) + print("group size is {}".format(design_param['group_size'])) + if design_param['generate_groups'] == 'alternative_module': + generate_groups_alt_module = __import__(design_param['groups_alternative_module'][0], globals(), locals(), + [], 0) + generate_groups_alt_function = getattr(generate_groups_alt_module, + design_param['groups_alternative_module'][1]) + passing_param, remaining_param = utils.param_distributor(design_param, generate_groups_alt_function) + A = generate_groups_alt_function(**passing_param) + elif design_param['generate_groups'] == 'input': + A = np.genfromtxt(design_param['groups_input'], delimiter=',') + # TODO: Check if m and N are defined too + assert np.array_equal(A, A.astype(bool)), "The input design matrix A is not binary!" + design_param['m'], design_param['N'] = A.shape + design_param['group_size'] = int(max(A.sum(axis=1))) + design_param['max_tests_per_individual'] = int(max(A.sum(axis=0))) + elif design_param['generate_groups'] == 'generate': + passing_param, remaining_param = utils.param_distributor(design_param, gen_measurement_matrix) + A = gen_measurement_matrix(**passing_param) + # generate the infected status of the individuals + if design_param['generate_individual_status'] == 'input': + u = np.genfromtxt(design_param['individual_status_input'], delimiter=',') + assert u.size == design_param['N'], "Individual status input file does not have the correct size!" + assert np.array_equal(u, u.astype(bool)), "Individual status input file is not binary!" + design_param['s'] = np.count_nonzero(u) + elif design_param['generate_individual_status'] == 'alternative_module': + individual_status_alt_module = __import__(design_param['individual_status_alternative_module'][0], + globals(), locals(), [], 0) + individual_status_alt_function = getattr(individual_status_alt_module, + design_param['individual_status_alternative_module'][1]) + passing_param, temp_remaining_param = utils.param_distributor(design_param, individual_status_alt_function) + remaining_param.update(temp_remaining_param) + u = individual_status_alt_function(**passing_param) + elif design_param['generate_individual_status'] == 'generate': + passing_param, temp_remaining_param = utils.param_distributor(design_param, gen_status_vector) + remaining_param.update(temp_remaining_param) + u = gen_status_vector(**passing_param) + u = [i[0] for i in u] + # generate the data corresponding to the group tests + if design_param['generate_test_results'] == 'input': + b = np.genfromtxt(design_param['test_results_input'], delimiter=',') + assert b.size == design_param['m'], "Test results input file does not have the correct size!" + assert np.array_equal(b, b.astype(bool)), "test results input file is not binary!" + elif design_param['generate_test_results'] == 'alternative_module': + test_results_alt_module = __import__(design_param['test_results_alternative_module'][0], + globals(), locals(), [], 0) + test_results_alt_function = getattr(test_results_alt_module, + design_param['test_results_alternative_module'][1]) + passing_param, temp_remaining_param = utils.param_distributor(design_param, test_results_alt_function) + remaining_param.update(temp_remaining_param) + b = test_results_alt_function(**passing_param) + elif design_param['generate_test_results'] == 'generate': + passing_param, temp_remaining_param = utils.param_distributor(design_param, gen_test_vector) + remaining_param.update(temp_remaining_param) + b = gen_test_vector(A, u,**passing_param) + for main_param in ['N', 'm', 's', 'group_size', 'seed']: + if main_param not in design_param: + if main_param not in remaining_param: + design_param[main_param]= 'N\A' + else: + design_param[main_param]=remaining_param[main_param] + if 'save_to_file' in design_param.keys() and design_param['save_to_file']: + design_path = utils.inner_path_generator(design_param['result_path'], 'Design') + design_matrix_path = utils.inner_path_generator(design_path, 'Design_Matrix') + pd.DataFrame(A).to_csv(utils.report_file_path(design_matrix_path, 'design_matrix', 'csv', design_param), + header=None, index=None) + if design_param['generate_individual_status']: + individual_status_path = utils.inner_path_generator(design_path,'Individual_Status') + pd.DataFrame(u).to_csv(utils.report_file_path(individual_status_path, 'individual_status','csv', design_param), + header=None, index=None) + if design_param['generate_test_results']: + test_results_path = utils.inner_path_generator(design_path,'Test_Results') + pd.DataFrame(b).to_csv(utils.report_file_path(test_results_path, 'test_results','csv', design_param), + header=None, index=None) + except: + e = sys.exc_info() + print("Error:", e) + decoder_param['decoding'] = False + + if decoder_param['decoding']: + try: + if decoder_param['decoder'] == 'generate': + # TODO: this is only for cplex! Change it to more general form! + log_path = utils.inner_path_generator(design_param['result_path'], 'Logs') + decoder_param['solver_options']['logPath'] = utils.report_file_path(log_path, 'log','txt', design_param) + passing_param,_ = utils.param_distributor(decoder_param,GroupTestingDecoder) + c = GroupTestingDecoder(**passing_param) + single_fit_start = time.time() + if decoder_param['lambda_selection']: + scoring = dict(Accuracy='accuracy', + balanced_accuracy=make_scorer(balanced_accuracy_score)) + print('cross validation') + grid = GridSearchCV(estimator=c, param_grid=decoder_param['cv_param'], + cv=decoder_param['number_of_folds'], + refit=decoder_param['eval_metric'], scoring=scoring, n_jobs=-1, + return_train_score=True, verbose=10) + grid.fit(A, b) + + c = grid.best_estimator_ + pd.DataFrame.from_dict(grid.cv_results_).to_csv( + utils.report_file_path(log_path, + 'cv_results', 'csv', design_param) + ) + pd.DataFrame(grid.best_params_, index=[0]).to_csv( + utils.report_file_path(log_path, + 'best_param', 'csv', design_param) + ) + else: + c.fit(A, b) + single_fit_end = time.time() + # print('SUM', np.sum(A, axis=0)) + # print('Score:', c.score(A, b)) + elif decoder_param['decoder'] == 'alternative_module': + # TODO: CV for alternative module. Is it needed? + single_fit_start = time.time() + decoder_alt_module = __import__(decoder_param['decoder_alternative_module'][0], + globals(), locals(), [], 0) + decoder_alt_function = getattr(decoder_alt_module, + decoder_param['decoder_alternative_module'][1]) + passing_param, _ = utils.param_distributor(decoder_param, decoder_alt_function) + c = decoder_alt_function(**passing_param) + c.fit(A, b) + single_fit_end = time.time() + if 'save_to_file' in design_param.keys() and design_param['save_to_file']: + solution_path = utils.inner_path_generator(design_param['result_path'], 'Solutions') + pd.DataFrame(c.solution()).to_csv(utils.report_file_path(solution_path, 'solution','csv', design_param), + header=None, index=None) + # evaluate the accuracy of the solution + if decoder_param['evaluation']: + try: + ev_result = decoder_evaluation(u, c.solution(), decoder_param['eval_metric']) + ev_result['solver_time'] = round(single_fit_end - single_fit_start, 2) + # TODO: this is only for cplex status! Change it to more general form! + ev_result['Status'] = c.prob_.cplex_status + print('Evaluation is DONE!') + except Exception as e: + print(e) + ev_result = {'tn': None, 'fp': None, 'fn': None, 'tp': None} + single_run_end = time.time() + ev_result['time'] = round(single_run_end - single_run_start, 2) + ev_result.update({key: design_param[key] for key in ['N', 'm', 's', 'group_size', 'seed', + 'max_tests_per_individual']}) + return ev_result + except Exception as e: + print(e) + else: + print("Decoding was not performed!") + + + +# main method for testing +def main(sysargs=sys.argv[1:]): + start_time = time.time() + + # argparse + parser = argparse.ArgumentParser(prog='GroupTesting', description='Description') + required_args= parser.add_argument_group('required arguments') + parser.add_argument( + '--version', action='version', + version="%(prog)s version {version}".format(version=__version__) + ) + required_args.add_argument( + '--config', dest='config', metavar='FILE', + help='Path to the config.yml file', required=True, + ) + parser.add_argument( + '--output-dir', dest='output_path', metavar='DIR', + help='Path to the output directory', + ) + args = parser.parse_args() + + # Read config file + design_param, decoder_param = utils.config_reader(args.config) + # output files path + current_path, result_path = utils.result_path_generator(args) + for d in design_param: + d['result_path'] = result_path + if not any([d['lambda_selection'] if 'lambda_selection' in d.keys() else False for d in decoder_param]): + with Pool(processes=cpu_count()) as pool: + results = pool.starmap(multi_process_group_testing, itertools.product(design_param, decoder_param)) + pool.close() + pool.join() + else: + results = [multi_process_group_testing(i[0], i[1]) for i in itertools.product(design_param, decoder_param)] + + # Saving files + pd.DataFrame(design_param).to_csv(os.path.join(result_path, 'opts.csv')) + if all(v is not None for v in results): + column_names = ['N', 'm', 's', 'group_size', 'seed', 'max_tests_per_individual', 'tn', 'fp', 'fn', 'tp', + decoder_param[0]['eval_metric'], 'Status', 'solver_time', 'time'] + pd.DataFrame(results).reindex(columns=column_names).to_csv(os.path.join(result_path, 'ConfusionMatrix.csv')) + + end_time = time.time() + print(end_time - start_time) \ No newline at end of file diff --git a/build/lib/group_testing/group_testing_decoder.py b/build/lib/group_testing/group_testing_decoder.py new file mode 100644 index 0000000..ca5d3b6 --- /dev/null +++ b/build/lib/group_testing/group_testing_decoder.py @@ -0,0 +1,215 @@ +import pulp as pl +import os +import numpy as np +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.metrics import balanced_accuracy_score + +from group_testing.generate_test_results import gen_test_vector +import group_testing.utils as utils + + +class GroupTestingDecoder(BaseEstimator, ClassifierMixin): + def __init__(self, lambda_w=1, lambda_p=1, lambda_n=1, lambda_e=None, defective_num_lower_bound=None, + sensitivity_threshold=None, specificity_threshold=None, lp_relaxation=False, lp_rounding_threshold=0, + is_it_noiseless=True, solver_name=None, solver_options=None): + # TODO: Check their values + # TODO: Change lambda_w to sample weight + self.lambda_e = lambda_e + self.lambda_p = lambda_p + self.lambda_n = lambda_n + + # ----------------------------------------- + # lambda_w is added as a coefficient for vector w. lambda_w could be used as a vector of prior probabilities. + # lambda_w default value is 1. + try: + assert isinstance(lambda_w, (int, float, list)) + self.lambda_w = lambda_w + except AssertionError: + print("lambda_w should be either int, float or list of numbers") + # ----------------------------------------- + self.defective_num_lower_bound = defective_num_lower_bound + self.sensitivity_threshold = sensitivity_threshold + self.specificity_threshold = specificity_threshold + self.lp_relaxation = lp_relaxation + self.lp_rounding_threshold = lp_rounding_threshold + self.is_it_noiseless = is_it_noiseless + self.solver_name = solver_name + self.solver_options = solver_options + self.prob_ = None + self.ep_cat = 'Binary' + self.en_cat = 'Binary' + self.en_upBound = 1 + + def fit(self, A, label): + if self.lambda_e is not None: + # Use lambda_e if both lambda_p and lambda_n have same value + self.lambda_p = self.lambda_e + self.lambda_n = self.lambda_e + m, n = A.shape + alpha = A.sum(axis=1) + label = np.array(label) + positive_label = np.where(label == 1)[0] + negative_label = np.where(label == 0)[0] + # positive_label = [idx for idx,i in enumerate(label) if i==1] + # negative_label = [idx for idx,i in enumerate(label) if i==0] + # ------------------------------------- + # Checking length of lambda_w + try: + if isinstance(self.lambda_w, list): + assert len(self.lambda_w) == n + except AssertionError: + print("length of lambda_w should be equal to number of individuals( numbers of columns in the group " + "testing matrix)") + # ------------------------------------- + # Initializing the ILP problem + p = pl.LpProblem('GroupTesting', pl.LpMinimize) + # p.verbose(param['verbose']) + # Variables kind + if self.lp_relaxation: + varCategory = 'Continuous' + else: + varCategory = 'Binary' + # Variable w + w = pl.LpVariable.dicts('w', range(n), lowBound=0, upBound=1, cat=varCategory) + # -------------------------------------- + # Noiseless setting + if self.is_it_noiseless: + # Defining the objective function + p += pl.lpSum([self.lambda_w * w[i] if isinstance(self.lambda_w, (int, float)) else self.lambda_w[i] * w[i] + for i in range(n)]) + # Constraints + for i in positive_label: + p += pl.lpSum([A[i][j] * w[j] for j in range(n)]) >= 1 + for i in negative_label: + p += pl.lpSum([A[i][j] * w[j] for j in range(n)]) == 0 + # Prevalence lower-bound + if self.defective_num_lower_bound is not None: + p += pl.lpSum([w[k] for k in range(n)]) >= self.defective_num_lower_bound + + # -------------------------------------- + # Noisy setting + else: + ep = [] + en = [] + # Variable ep + if len(positive_label) != 0: + ep = LpVariable.dicts(name='ep', indexs=list(positive_label), lowBound=0, upBound=1, cat=self.ep_cat) + # Variable en + if len(negative_label) != 0: + en = LpVariable.dicts(name='en', indexs=list(negative_label), lowBound=0, upBound=self.en_upBound, + cat=self.en_cat) + # Defining the objective function + p += pl.lpSum([self.lambda_w * w[i] if isinstance(self.lambda_w, (int, float)) else self.lambda_w[i] * w[i] + for i in range(n)]) + \ + pl.lpSum([self.lambda_p * ep[j] for j in positive_label]) + \ + pl.lpSum([self.lambda_n * en[k] for k in negative_label]) + # Constraints + for i in positive_label: + p += pl.lpSum([A[i][j] * w[j] for j in range(n)] + ep[i]) >= 1 + for i in negative_label: + if self.en_cat == 'Continuous': + p += pl.lpSum([A[i][j] * w[j] for j in range(n)] + -1 * en[i]) == 0 + else: + p += pl.lpSum([-1 * A[i][j] * w[j] for j in range(n)] + alpha[i] * en[i]) >= 0 + # Prevalence lower-bound + if self.defective_num_lower_bound is not None: + p += pl.lpSum([w[i] for i in range(n)]) >= self.defective_num_lower_bound + solver = pl.get_solver(self.solver_name, **self.solver_options) + p.solve(solver) + if not self.lp_relaxation: + p.roundSolution() + # ---------------- + self.prob_ = p + print("Status:", pl.LpStatus[p.status]) + return self + + def get_params_w(self, deep=True): + variable_type = 'w' + try: + assert self.prob_ is not None + # w_solution_dict = dict([(v.name, v.varValue) + # for v in self.prob_.variables() if variable_type in v.name and v.varValue > 0]) + # TODO: Pulp uses ASCII sort when we recover the solution. It would cause a lot of problems when we want + # TODO: to use the solution. We need to use alphabetical sort based on variables names (v.names). To do so + # TODO: we use utils.py and the following lines of codes + w_solution_dict = dict([(v.name, v.varValue) + for v in self.prob_.variables() if variable_type in v.name]) + index_map = {v: i for i, v in enumerate(sorted(w_solution_dict.keys(), key=utils.natural_keys))} + w_solution_dict = {k: v for k, v in sorted(w_solution_dict.items(), key=lambda pair: index_map[pair[0]])} + except AttributeError: + raise RuntimeError("You must fit the data first!") + return w_solution_dict + + def solution(self): + try: + assert self.prob_ is not None + # w_solution = [v.name[2:] for v in self.prob_.variables() if v.name[0] == 'w' and v.varValue > 0] + # TODO: Pulp uses ASCII sort when we recover the solution. It would cause a lot of problems when we want + # TODO: to use the solution. We need to use alphabetical sort based on variables names (v.names). To do so + # TODO: we use utils.py and the following lines of codes + w_solution = self.get_params_w() + index_map = {v: i for i, v in enumerate(sorted(w_solution.keys(), key=utils.natural_keys))} + w_solution = [v for k, v in sorted(w_solution.items(), key=lambda pair: index_map[pair[0]])] + if self.lp_relaxation: + w_solution = [1 if i > self.lp_rounding_threshold else 0 for i in w_solution] + except AttributeError: + raise RuntimeError("You must fit the data first!") + return w_solution + + def predict(self, X): + return np.minimum(np.matmul(X, self.solution()), 1) + + # def score(self): + # pass + + def decodingScore(self, w_true): + return balanced_accuracy_score(w_true, self.solution()) + + def write(self): + pass + + +if __name__ == '__main__': + # options for plotting, verbose output, saving, seed + opts = {} + opts['m'] = 150 + opts['N'] = 300 + opts['verbose'] = True # False + opts['plotting'] = True # False + opts['saving'] = True + opts['run_ID'] = 'GT_test_result_vector_generation_component' + opts['data_filename'] = opts['run_ID'] + '_generate_groups_output.mat' + opts['test_noise_methods'] = [] + opts['seed'] = 0 + + A = np.random.randint(2, size=(opts['m'], opts['N'])) + u = np.random.randint(2, size=opts['N']) + b = gen_test_vector(A, u, opts) + # print(A) + # print(b) + # print(u) + + # Test + # A = np.array([[1,0,0],[1,0,1],[0,1,0]]) + # b = np.array([1,0,1]) + current_directory = os.getcwd() + file_path = os.path.join(current_directory, r'problem.mps') + + param = {} + # param['file_path'] = file_path + param['lambda_w'] = 1 + param['lambda_p'] = 100 + param['lambda_n'] = 100 + # param['verbose'] = False + param['defective_num_lower_bound'] = None + param['sensitivity_threshold'] = None + param['specificity_threshold'] = None + param['is_it_noiseless'] = True + param['lp_relaxation'] = True + param['solver_name'] = 'COIN_CMD' + param['solver_options'] = {'timeLimit': 60, 'logPath': 'log.txt'} + + c = GroupTestingDecoder(**param) + c.fit(A, b) + print(c.decodingScore(b)) + diff --git a/build/lib/group_testing/group_testing_evaluation.py b/build/lib/group_testing/group_testing_evaluation.py new file mode 100644 index 0000000..9707e24 --- /dev/null +++ b/build/lib/group_testing/group_testing_evaluation.py @@ -0,0 +1,22 @@ +import sklearn +from sklearn.metrics import confusion_matrix +import os + + +def decoder_evaluation(w_true, sln, ev_metric='balanced_accuracy'): + tn, fp, fn, tp = confusion_matrix(w_true, sln).ravel() + eval_metric = getattr(sklearn.metrics,'{}_score'.format(ev_metric)) + eval_score = eval_metric(w_true, sln) + ev_result = {'tn': tn, 'fp': fp, 'fn': fn, 'tp':tp, ev_metric:round(eval_score, 3)} + print(ev_result) + return ev_result + + +if __name__ == '__main__': + # TODO: update this part + current_directory = os.getcwd() + file_path = os.path.join(current_directory, r'problem.mps') + u = [1, 0, 0, 1, 0, 1] + sln = {'w': [0, 3]} + n = 6 + decoder_evaluation(u, sln, n) diff --git a/build/lib/group_testing/utils.py b/build/lib/group_testing/utils.py new file mode 100644 index 0000000..89c5f81 --- /dev/null +++ b/build/lib/group_testing/utils.py @@ -0,0 +1,219 @@ +''' +The following code was copied from a stackoverflow post: +https://stackoverflow.com/questions/5967500/how-to-correctly-sort-a-string-with-a-number-inside +''' +import re +import numpy as np +import itertools +import os +import datetime +import yaml +import sys +from shutil import copyfile +import inspect +import math +import warnings + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def config_decoder(config_inpt): + ''' + TODO: I used the following stackoverflow post for the return part: + https://stackoverflow.com/questions/5228158/cartesian-product-of-a-dictionary-of-lists + ''' + for keys, vals in config_inpt.items(): + if isinstance(vals, dict): + # print(vals) + if 'mode' in config_inpt[keys].keys(): + if config_inpt[keys]['mode'] == 'range': + config_inpt[keys] = np.arange(*config_inpt[keys]['values']) + elif config_inpt[keys]['mode'] == 'list': + config_inpt[keys] = config_inpt[keys]['values'] + else: + config_inpt[keys] = [config_inpt[keys]] + else: + config_inpt[keys] = [vals] + return [dict(zip(config_inpt.keys(), vals)) for vals in itertools.product(*config_inpt.values())] + + +def config_input_or_params(current_dict, block_name, generate_label): + if 'input' in current_dict.keys(): + current_setting = {'{}_input'.format(block_name): current_dict['input']} + current_setting[generate_label] = 'input' + else: + current_setting = current_dict['params'] + current_setting[generate_label] = 'generate' + if 'alternative_module' in current_dict.keys(): + current_setting[generate_label] = 'alternative_module' + current_setting['{}_alternative_module'.format(block_name)] = current_dict['alternative_module'] + return current_setting + + +def config_reader(config_file_name): + try: + # Read config file + with open(config_file_name, 'r') as config_file: + config_dict = yaml.load(config_file, Loader=yaml.FullLoader) + design_param = {'generate_groups': False} + decoder_param = {'decoding': False, 'decoder': False, 'lambda_selection': False, 'evaluation': False} + # Load params + if 'design' in config_dict.keys(): + assert 'groups' in config_dict['design'].keys(), \ + "You should define the 'groups' block in the config file!" + generate_groups = config_input_or_params(config_dict['design']['groups'], 'groups', 'generate_groups') + design_param.update(generate_groups) + try: + generate_individual_status = config_input_or_params( + current_dict=config_dict['design']['individual_status'], + block_name='individual_status', + generate_label='generate_individual_status') + generate_individual_status_keys = list(generate_individual_status.keys()) + for k in generate_individual_status_keys: + if k in design_param.keys(): + print( + '{} has been set before in the "group" block! The framework would continue with the initial' + ' value!'.format(k)) + generate_individual_status.pop(k) + design_param.update(generate_individual_status) + except KeyError: + print( + "Warning: 'individual_status' block is not found in the config file! Individual status is necessary" + " if the results need to be evaluated!") + design_param['generate_individual_status'] = False + try: + generate_test_results = config_input_or_params(config_dict['design']['test_results'], 'test_results', + 'generate_test_results') + generate_test_results_keys = list(generate_test_results.keys()) + for k in generate_test_results_keys: + if k in design_param.keys(): + print('{} has been set before in the "group" or "individual_status" block! The framework ' + 'would continue with the initial value!'.format(k)) + generate_test_results.pop(k) + design_param.update(generate_test_results) + design_param['test_results'] = True + except KeyError: + print("Warning: 'test_results' block is not found in the config file! Test results is necessary for" + " decoding!") + design_param['generate_test_results'] = False + design_param['test_results'] = False + if 'decode' in config_dict.keys(): + assert design_param['test_results'], "It is not possible to decode without test results! Please define the " \ + "'test_results' block in the config file." + decoder_param['decoding'] = True + if 'decoder' in config_dict['decode'].keys(): + try: + decode_param = config_input_or_params(config_dict['decode']['decoder'], 'decoder', 'decoder') + # decoder_param['decoder'] = True + decoder_param.update(decode_param) + except: + print("decoder format in the config file is not correct!") + e = sys.exc_info()[0] + print("Error:", e) + if 'evaluation' in config_dict['decode'].keys(): + try: + evaluation_param = config_dict['decode']['evaluation'] + decoder_param['evaluation'] = True + decoder_param.update(evaluation_param) + except: + print("evaluation format in the config file is not correct!") + e = sys.exc_info()[0] + print("Error:", e) + + return config_decoder(design_param), config_decoder(decoder_param) + except FileNotFoundError: + sys.exit("Config file '{}' cannot be found!".format(config_file_name)) + except: + e = sys.exc_info()[0] + sys.exit(e) + + +def path_generator(file_path, file_name, file_format, dir_name=None): + currentDate = datetime.datetime.now() + # TODO: change dir_name to unique code if you want to run multiple configs + if dir_name is None: + dir_name = currentDate.strftime("%b_%d_%Y_%H_%M") + local_path = "./Results/{}".format(dir_name) + path = os.getcwd() + if not os.path.isdir(local_path): + try: + os.makedirs(path + local_path[1:]) + except OSError: + print("Creation of the directory %s failed" % path + local_path[1:]) + else: + print("Successfully created the directory %s" % path + local_path[1:]) + return path + local_path[1:] + "/{}.{}".format(file_name, file_format) + + +def report_file_path(report_path, report_label, report_extension, params): + report_path = report_path + '/{}_N{}_g{}_m{}_s{}_seed{}.{}'.format(report_label, params['N'], params['group_size'], + params['m'], params['s'], params['seed'], + report_extension) + return report_path + + +def result_path_generator(args): + current_path = os.getcwd() + currentDate = datetime.datetime.now() + if args.output_path is None: + dir_name = currentDate.strftime("%b_%d_%Y_%H_%M_%S") + else: + dir_name = args.output_path + result_path = os.path.join(current_path, "Results/{}".format(dir_name)) + if not os.path.isdir(result_path): + try: + os.makedirs(result_path) + except OSError: + print("Creation of the directory %s failed" % result_path) + else: + print("Successfully created the directory %s " % result_path) + # Copy config file + copyfile(args.config, os.path.join(result_path, 'config.yml')) + return current_path, result_path + + +def inner_path_generator(current_path, inner_dir): + inner_path = os.path.join(current_path, inner_dir) + if not os.path.isdir(inner_path): + try: + os.makedirs(inner_path) + except OSError: + print("Creation of the directory %s failed" % inner_path) + else: + print("Successfully created the directory %s " % inner_path) + return inner_path + + +def param_distributor(param_dictionary, function_name): + passing_param = {k: param_dictionary[k] for k in inspect.signature(function_name).parameters if + k in param_dictionary} + remaining_param = {k: inspect.signature(function_name).parameters[k].default if + inspect.signature(function_name).parameters[k].default != inspect._empty else None for k in + inspect.signature(function_name).parameters if k not in passing_param} + return passing_param, remaining_param + + +def auto_group_size(N, s): + group_size = round(math.log(0.5) / math.log(1 - (int(s) / int(N)))) + if group_size > 32: + print('The optimal group size is {}, However the maximum group size cannot be greater than 32!' + ' The group size is set to 32'.format(group_size)) + group_size = 32 + return group_size + + +if __name__ == '__main__': + design_param, decoder_param = config_reader('config.yml') + print(design_param, decoder_param) + print(len(design_param), len(decoder_param)) diff --git a/dist/GroupTesting-0.1.0.macosx-10.9-x86_64.tar.gz b/dist/GroupTesting-0.1.0.macosx-10.9-x86_64.tar.gz new file mode 100644 index 0000000..92597b7 Binary files /dev/null and b/dist/GroupTesting-0.1.0.macosx-10.9-x86_64.tar.gz differ diff --git a/group_testing/config.yml b/group_testing/config.yml index 703f387..537ceec 100644 --- a/group_testing/config.yml +++ b/group_testing/config.yml @@ -6,7 +6,7 @@ design: params: seed: mode: 'range' - values: [0,1,1] + values: [0,2,1] N: 1000 'm': 500 'verbose': False diff --git a/group_testing/generate_groups.py b/group_testing/generate_groups.py index 44084cf..e699fc4 100644 --- a/group_testing/generate_groups.py +++ b/group_testing/generate_groups.py @@ -44,7 +44,7 @@ def gen_measurement_matrix(seed=0, N=1000, m=100, group_size=4, max_tests_per_in graph_gen_method='no_multiple', plotting=False, saving=True, run_ID='debugging'): # set the seed used for graph generation to the options seed random.seed(seed) - np.random.seed(seed) + np_random = np.random.RandomState(seed) # compute tests per individual needed to satisfy # N*tests_per_individual == m*group_size avg_test_split = math.floor(m * group_size / N) @@ -103,7 +103,7 @@ def gen_measurement_matrix(seed=0, N=1000, m=100, group_size=4, max_tests_per_in while (np.sum(indeg) > np.sum(outdeg)): nz_indeg = indeg[(indeg > 0)] max_indices = np.array(np.where(indeg == nz_indeg.max())) - index = np.random.randint(max_indices.shape[1], size=1) + index = np_random.randint(max_indices.shape[1], size=1) indeg[max_indices[0, index]] = indeg[max_indices[0, index]] - 1 # check if the number of tests per individual is less than the max, and, @@ -122,7 +122,7 @@ def gen_measurement_matrix(seed=0, N=1000, m=100, group_size=4, max_tests_per_in # min_indices = np.array(np.where((indeg == indeg.min()) & (indeg > 0))) # print(min_indices) # print(min_indices.shape[1]) - index = np.random.randint(min_indices.shape[1], size=1) + index = np_random.randint(min_indices.shape[1], size=1) # print('generated index = ' + str(index)) indeg[min_indices[0, index]] = indeg[min_indices[0, index]] + 1 diff --git a/group_testing/generate_individual_status.py b/group_testing/generate_individual_status.py index 428ce60..4f8827d 100644 --- a/group_testing/generate_individual_status.py +++ b/group_testing/generate_individual_status.py @@ -21,11 +21,12 @@ def gen_status_vector(seed=0, N=1000, s=10, verbose=False): # set the seed used for status generation - random.seed(seed) + local_random = random.Random() + local_random.seed(seed) np.random.seed(seed) # generate a random vector having sparsity level s - indices = random.sample(range(N), s) + indices = local_random.sample(range(N), s) u = np.zeros((N, 1)) for i in indices: u[i] = 1 diff --git a/group_testing/generate_test_results.py b/group_testing/generate_test_results.py index 8258730..852ea68 100644 --- a/group_testing/generate_test_results.py +++ b/group_testing/generate_test_results.py @@ -30,7 +30,7 @@ def gen_test_vector(A, u, seed=0, m=100, verbose=False, test_noise_methods=[], b theta_l=0, theta_u=0.0625, permutation_noise_prob=0.01): # set the seed used for test result noise random.seed(seed) - np.random.seed(seed) + np_random = np.random.RandomState(seed) # generate the tests directly from A and u b = np.matmul(A, u) @@ -62,7 +62,7 @@ def gen_test_vector(A, u, seed=0, m=100, verbose=False, test_noise_methods=[], b num_flip = math.ceil(m * rho) - vec = np.random.choice(indices, size=num_flip, replace=False, p=weight_vec) + vec = np_random.choice(indices, size=num_flip, replace=False, p=weight_vec) # copy b into a new vector for adding noise b_noisy = np.array(b) @@ -117,7 +117,7 @@ def gen_test_vector(A, u, seed=0, m=100, verbose=False, test_noise_methods=[], b # b_noisy[i] = np.random.randint(2) # instead use probability of false negatives = 1/10 - b_noisy[i] = np.random.choice(np.arange(2), p=[0.1, 0.9]) + b_noisy[i] = np_random.choice(np.arange(2), p=[0.1, 0.9]) if verbose: print('after threshold noise - left: b, right: b_noisy') @@ -147,7 +147,7 @@ def gen_test_vector(A, u, seed=0, m=100, verbose=False, test_noise_methods=[], b print('number of permuted items exceeds m, incorrect range for rho?') sys.exit() else: - vec = np.random.choice(indices, size=2 * num_permute, replace=False, p=weight_vec) + vec = np_random.choice(indices, size=2 * num_permute, replace=False, p=weight_vec) # copy b into a new vector for adding noise b_noisy = np.array(b) diff --git a/group_testing/group_testing.py b/group_testing/group_testing.py index b46489e..c4a3559 100644 --- a/group_testing/group_testing.py +++ b/group_testing/group_testing.py @@ -99,7 +99,6 @@ def multi_process_group_testing(design_param, decoder_param): b = gen_test_vector(A, u,**passing_param) for main_param in ['N', 'm', 's', 'group_size', 'seed']: if main_param not in design_param: - #assert main_param in remaining_param, "{} is not defined in the config file!".format(main_param) if main_param not in remaining_param: design_param[main_param]= 'N\A' else: @@ -117,15 +116,17 @@ def multi_process_group_testing(design_param, decoder_param): test_results_path = utils.inner_path_generator(design_path,'Test_Results') pd.DataFrame(b).to_csv(utils.report_file_path(test_results_path, 'test_results','csv', design_param), header=None, index=None) - except Exception as design_error: - print(design_error) - decoder_param['decoding']=False + except: + e = sys.exc_info() + print("Error:", e) + decoder_param['decoding'] = False if decoder_param['decoding']: try: if decoder_param['decoder'] == 'generate': # TODO: this is only for cplex! Change it to more general form! - decoder_param['solver_options']['logPath'] = utils.report_file_path(design_param['log_path'], 'log','txt', design_param) + log_path = utils.inner_path_generator(design_param['result_path'], 'Logs') + decoder_param['solver_options']['logPath'] = utils.report_file_path(log_path, 'log','txt', design_param) passing_param,_ = utils.param_distributor(decoder_param,GroupTestingDecoder) c = GroupTestingDecoder(**passing_param) single_fit_start = time.time() @@ -141,11 +142,11 @@ def multi_process_group_testing(design_param, decoder_param): c = grid.best_estimator_ pd.DataFrame.from_dict(grid.cv_results_).to_csv( - utils.report_file_path(design_param['log_path'], + utils.report_file_path(log_path, 'cv_results', 'csv', design_param) ) pd.DataFrame(grid.best_params_, index=[0]).to_csv( - utils.report_file_path(design_param['log_path'], + utils.report_file_path(log_path, 'best_param', 'csv', design_param) ) else: @@ -215,9 +216,10 @@ def main(sysargs=sys.argv[1:]): # Read config file design_param, decoder_param = utils.config_reader(args.config) # output files path - current_path, design_param[0]['result_path'] = utils.result_path_generator(args) - design_param[0]['log_path'] = utils.inner_path_generator(design_param[0]['result_path'], 'Logs') - if not decoder_param[0]['lambda_selection']: + current_path, result_path = utils.result_path_generator(args) + for d in design_param: + d['result_path'] = result_path + if not any([d['lambda_selection'] if 'lambda_selection' in d.keys() else False for d in decoder_param]): with Pool(processes=cpu_count()) as pool: results = pool.starmap(multi_process_group_testing, itertools.product(design_param, decoder_param)) pool.close() @@ -226,12 +228,11 @@ def main(sysargs=sys.argv[1:]): results = [multi_process_group_testing(i[0], i[1]) for i in itertools.product(design_param, decoder_param)] # Saving files - pd.DataFrame(design_param).to_csv(os.path.join(design_param[0]['result_path'], 'opts.csv')) - #print("---------------------->", results) + pd.DataFrame(design_param).to_csv(os.path.join(result_path, 'opts.csv')) if all(v is not None for v in results): column_names = ['N', 'm', 's', 'group_size', 'seed', 'max_tests_per_individual', 'tn', 'fp', 'fn', 'tp', decoder_param[0]['eval_metric'], 'Status', 'solver_time', 'time'] - pd.DataFrame(results).reindex(columns=column_names).to_csv(os.path.join(design_param[0]['result_path'], 'ConfusionMatrix.csv')) + pd.DataFrame(results).reindex(columns=column_names).to_csv(os.path.join(result_path, 'ConfusionMatrix.csv')) end_time = time.time() print(end_time - start_time) \ No newline at end of file diff --git a/group_testing/utils.py b/group_testing/utils.py index 6313121..89c5f81 100644 --- a/group_testing/utils.py +++ b/group_testing/utils.py @@ -66,75 +66,77 @@ def config_reader(config_file_name): # Read config file with open(config_file_name, 'r') as config_file: config_dict = yaml.load(config_file, Loader=yaml.FullLoader) - except: - - e = sys.exc_info()[0] - print("config file can not be found!") - print("Error:", e) - design_param = {'generate_groups': False} - decoder_param = {'decoding': False, 'decoder': False, 'lambda_selection': False, 'evaluation': False} - # Load params - if 'design' in config_dict.keys(): - assert 'groups' in config_dict['design'].keys(), \ - "You should define the 'groups' block in the config file!" - generate_groups = config_input_or_params(config_dict['design']['groups'], 'groups', 'generate_groups') - design_param.update(generate_groups) - try: - generate_individual_status = config_input_or_params( - current_dict=config_dict['design']['individual_status'], - block_name='individual_status', - generate_label='generate_individual_status') - generate_individual_status_keys = list(generate_individual_status.keys()) - for k in generate_individual_status_keys: - if k in design_param.keys(): - print('{} has been set before in the "group" block! The framework would continue with the initial' - ' value!'.format(k)) - generate_individual_status.pop(k) - design_param.update(generate_individual_status) - except KeyError: - print("Warning: 'individual_status' block is not found in the config file! Individual status is necessary" - " if the results need to be evaluated!") - design_param['generate_individual_status'] = False - try: - generate_test_results = config_input_or_params(config_dict['design']['test_results'], 'test_results', - 'generate_test_results') - generate_test_results_keys = list(generate_test_results.keys()) - for k in generate_test_results_keys: - if k in design_param.keys(): - print('{} has been set before in the "group" or "individual_status" block! The framework ' - 'would continue with the initial value!'.format(k)) - generate_test_results.pop(k) - design_param.update(generate_test_results) - design_param['test_results'] = True - except KeyError: - print("Warning: 'test_results' block is not found in the config file! Test results is necessary for" - " decoding!") - design_param['generate_test_results'] = False - design_param['test_results'] = False - if 'decode' in config_dict.keys(): - assert design_param['test_results'], "It is not possible to decode without test results! Please define the " \ - "'test_results' block in the config file." - decoder_param['decoding'] = True - if 'decoder' in config_dict['decode'].keys(): + design_param = {'generate_groups': False} + decoder_param = {'decoding': False, 'decoder': False, 'lambda_selection': False, 'evaluation': False} + # Load params + if 'design' in config_dict.keys(): + assert 'groups' in config_dict['design'].keys(), \ + "You should define the 'groups' block in the config file!" + generate_groups = config_input_or_params(config_dict['design']['groups'], 'groups', 'generate_groups') + design_param.update(generate_groups) try: - decode_param = config_input_or_params(config_dict['decode']['decoder'], 'decoder', 'decoder') - # decoder_param['decoder'] = True - decoder_param.update(decode_param) - except: - print("decoder format in the config file is not correct!") - e = sys.exc_info()[0] - print("Error:", e) - if 'evaluation' in config_dict['decode'].keys(): + generate_individual_status = config_input_or_params( + current_dict=config_dict['design']['individual_status'], + block_name='individual_status', + generate_label='generate_individual_status') + generate_individual_status_keys = list(generate_individual_status.keys()) + for k in generate_individual_status_keys: + if k in design_param.keys(): + print( + '{} has been set before in the "group" block! The framework would continue with the initial' + ' value!'.format(k)) + generate_individual_status.pop(k) + design_param.update(generate_individual_status) + except KeyError: + print( + "Warning: 'individual_status' block is not found in the config file! Individual status is necessary" + " if the results need to be evaluated!") + design_param['generate_individual_status'] = False try: - evaluation_param = config_dict['decode']['evaluation'] - decoder_param['evaluation'] = True - decoder_param.update(evaluation_param) - except: - print("evaluation format in the config file is not correct!") - e = sys.exc_info()[0] - print("Error:", e) - - return config_decoder(design_param), config_decoder(decoder_param) + generate_test_results = config_input_or_params(config_dict['design']['test_results'], 'test_results', + 'generate_test_results') + generate_test_results_keys = list(generate_test_results.keys()) + for k in generate_test_results_keys: + if k in design_param.keys(): + print('{} has been set before in the "group" or "individual_status" block! The framework ' + 'would continue with the initial value!'.format(k)) + generate_test_results.pop(k) + design_param.update(generate_test_results) + design_param['test_results'] = True + except KeyError: + print("Warning: 'test_results' block is not found in the config file! Test results is necessary for" + " decoding!") + design_param['generate_test_results'] = False + design_param['test_results'] = False + if 'decode' in config_dict.keys(): + assert design_param['test_results'], "It is not possible to decode without test results! Please define the " \ + "'test_results' block in the config file." + decoder_param['decoding'] = True + if 'decoder' in config_dict['decode'].keys(): + try: + decode_param = config_input_or_params(config_dict['decode']['decoder'], 'decoder', 'decoder') + # decoder_param['decoder'] = True + decoder_param.update(decode_param) + except: + print("decoder format in the config file is not correct!") + e = sys.exc_info()[0] + print("Error:", e) + if 'evaluation' in config_dict['decode'].keys(): + try: + evaluation_param = config_dict['decode']['evaluation'] + decoder_param['evaluation'] = True + decoder_param.update(evaluation_param) + except: + print("evaluation format in the config file is not correct!") + e = sys.exc_info()[0] + print("Error:", e) + + return config_decoder(design_param), config_decoder(decoder_param) + except FileNotFoundError: + sys.exit("Config file '{}' cannot be found!".format(config_file_name)) + except: + e = sys.exc_info()[0] + sys.exit(e) def path_generator(file_path, file_name, file_format, dir_name=None): @@ -154,18 +156,20 @@ def path_generator(file_path, file_name, file_format, dir_name=None): return path + local_path[1:] + "/{}.{}".format(file_name, file_format) -def report_file_path(report_path, report_label,report_extension, params): +def report_file_path(report_path, report_label, report_extension, params): report_path = report_path + '/{}_N{}_g{}_m{}_s{}_seed{}.{}'.format(report_label, params['N'], params['group_size'], - params['m'], params['s'], params['seed'], + params['m'], params['s'], params['seed'], report_extension) return report_path -def result_path_generator(args, dir_name=None): +def result_path_generator(args): current_path = os.getcwd() currentDate = datetime.datetime.now() - if dir_name is None: + if args.output_path is None: dir_name = currentDate.strftime("%b_%d_%Y_%H_%M_%S") + else: + dir_name = args.output_path result_path = os.path.join(current_path, "Results/{}".format(dir_name)) if not os.path.isdir(result_path): try: @@ -192,19 +196,23 @@ def inner_path_generator(current_path, inner_dir): def param_distributor(param_dictionary, function_name): - passing_param = {k: param_dictionary[k] for k in inspect.signature(function_name).parameters if k in param_dictionary} + passing_param = {k: param_dictionary[k] for k in inspect.signature(function_name).parameters if + k in param_dictionary} remaining_param = {k: inspect.signature(function_name).parameters[k].default if - inspect.signature(function_name).parameters[k].default!= inspect._empty else None for k in + inspect.signature(function_name).parameters[k].default != inspect._empty else None for k in inspect.signature(function_name).parameters if k not in passing_param} return passing_param, remaining_param - -def auto_group_size(N,s): - group_size = round(math.log(0.5)/math.log(1-(int(s)/int(N)))) + +def auto_group_size(N, s): + group_size = round(math.log(0.5) / math.log(1 - (int(s) / int(N)))) if group_size > 32: - group_size=32 + print('The optimal group size is {}, However the maximum group size cannot be greater than 32!' + ' The group size is set to 32'.format(group_size)) + group_size = 32 return group_size + if __name__ == '__main__': design_param, decoder_param = config_reader('config.yml') print(design_param, decoder_param)