change random seed to RandomState for numpy

WGS-TB · Apr 27, 2021 · 9e821db · 9e821db
1 parent a96e274
commit 9e821db
Show file tree

Hide file tree

Showing 21 changed files with 1,410 additions and 100 deletions.
diff --git a/GroupTesting.egg-info/PKG-INFO b/GroupTesting.egg-info/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 1.0
+Name: GroupTesting
+Version: 0.1.0
+Summary: Group testing for SARS-CoV-2 in large populations.
+Home-page: https://github.com/WGS-TB/GroupTesting
+Author: Hooman Zabeti, Nick Dexter
+Author-email: UNKNOWN
+License: TBD
+Description: UNKNOWN
+Platform: UNKNOWN
diff --git a/GroupTesting.egg-info/SOURCES.txt b/GroupTesting.egg-info/SOURCES.txt
@@ -0,0 +1,17 @@
+MANIFEST.in
+README.md
+setup.py
+GroupTesting.egg-info/PKG-INFO
+GroupTesting.egg-info/SOURCES.txt
+GroupTesting.egg-info/dependency_links.txt
+GroupTesting.egg-info/entry_points.txt
+GroupTesting.egg-info/requires.txt
+GroupTesting.egg-info/top_level.txt
+group_testing/__init__.py
+group_testing/generate_groups.py
+group_testing/generate_individual_status.py
+group_testing/generate_test_results.py
+group_testing/group_testing.py
+group_testing/group_testing_decoder.py
+group_testing/group_testing_evaluation.py
+group_testing/utils.py
diff --git a/GroupTesting.egg-info/dependency_links.txt b/GroupTesting.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/GroupTesting.egg-info/entry_points.txt b/GroupTesting.egg-info/entry_points.txt
@@ -0,0 +1,4 @@
+
+    [console_scripts]
+    GroupTesting = group_testing.group_testing:main
+
diff --git a/GroupTesting.egg-info/requires.txt b/GroupTesting.egg-info/requires.txt
@@ -0,0 +1,9 @@
+cplex
+numpy
+pandas
+pulp
+pymprog
+python-igraph
+pyyaml
+scipy
+sklearn
diff --git a/GroupTesting.egg-info/top_level.txt b/GroupTesting.egg-info/top_level.txt
@@ -0,0 +1 @@
+group_testing
diff --git a/build/lib/group_testing/__init__.py b/build/lib/group_testing/__init__.py
@@ -0,0 +1,2 @@
+_program = "GroupTesting"
+__version__ = "0.1.0"
diff --git a/build/lib/group_testing/generate_groups.py b/build/lib/group_testing/generate_groups.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jun 25 13:45:50 2020
+
+@author: ndexter
+"""
+
+# import standard libraries
+import sys, math
+from os import path
+import random
+import numpy as np
+
+# np.set_printoptions(threshold=np.inf)
+np.set_printoptions(edgeitems=60, linewidth=100000,
+                    formatter=dict(float=lambda x: "%.3g" % x))
+import igraph
+import scipy.io as sio
+
+# import hdf5storage
+# import matplotlib
+# atplotlib.use('Agg')
+# import unittest
+
+# import plotting and data-manipulation tools
+# import matplotlib.pyplot as plt
+
+"""
+Unit testing to be added later
+"""
+# class TestMeasurementMatrix(unittest.TestCase):
+#
+#    def test_make_graph(self):
+#        gen_measurement_matrix(opts)
+
+
+"""
+Function to generate and return the matrix
+"""
+
+
+def gen_measurement_matrix(seed=0, N=1000, m=100, group_size=4, max_tests_per_individual=4, verbose=False,
+                           graph_gen_method='no_multiple', plotting=False, saving=True, run_ID='debugging'):
+    # set the seed used for graph generation to the options seed
+    random.seed(seed)
+    np_random = np.random.RandomState(seed)
+    # compute tests per individual needed to satisfy
+    #           N*tests_per_individual == m*group_size
+    avg_test_split = math.floor(m * group_size / N)
+    if verbose:
+        print('number of tests per individual needed to satisfy ' \
+              + '(N*tests_per_individual == m*group_size) = ' + str(avg_test_split))
+
+    # verify the given parameters can lead to a valid testing scheme:
+    #   if floor(m*group_size/N) > max_tests_per_individual, we would need to 
+    #   test each individual more times than the maximum allowed to satisfy 
+    #   the group size constraint
+    try:
+        # ensure that we're not violating the maximum
+        assert avg_test_split <= max_tests_per_individual
+    except AssertionError:
+        errstr = ('Assertion Failed: With group_size = ' + str(group_size) + ', since m = ' + str(m) +
+                  ' and N = ' + str(N) + ' we have floor(m*group_size/N) = ' + str(avg_test_split) +
+                  'which exceeds max_tests_per_individual = ' + str(max_tests_per_individual))
+        print(errstr)
+        sys.exit()
+
+    # compute the actual tests per individual
+    # note: we may end up with individuals being tested less than the maximum
+    #   allowed number of tests, but this is not a problem (only the opposite is)
+    tests_per_individual = max(min(max_tests_per_individual, avg_test_split), 1)
+
+    if verbose:
+        print("tests_per_individual = " + str(tests_per_individual))
+
+    # In this model, the first N elements of the degree sequence correspond
+    # to the population, while the last m elements correspond to the groups
+
+    # in-degree corresponds to the individuals, here we set the first N 
+    # entries of the in-degree sequence to specify the individuals 
+    indeg = np.zeros(N + m)
+    indeg[0:N] = tests_per_individual
+
+    # out-degree corresponds to the group tests, here we set the last m 
+    # entries of the out-degree sequence to specify the groups
+    outdeg = np.zeros(N + m)
+    outdeg[N:(N + m)] = group_size
+
+    # keep track of vertex types for final graph vertex coloring
+    vtypes = np.zeros(N + m)
+    vtypes[0:N] = 1
+
+    # output the sum of indeg and outdeg if checking conditions
+    if verbose:
+        print("out degree sequence: {}".format(outdeg.tolist()))
+        print("in degree sequence:  {}".format(indeg.tolist()))
+        print("sum outdeg (groups) = {}".format(np.sum(outdeg)))
+        print("sum indeg (individ) = {}".format(np.sum(indeg)))
+
+    # check if we have too many individuals being tested
+    # if np.sum(indeg) > np.sum(outdeg):
+    while (np.sum(indeg) > np.sum(outdeg)):
+        nz_indeg = indeg[(indeg > 0)]
+        max_indices = np.array(np.where(indeg == nz_indeg.max()))
+        index = np_random.randint(max_indices.shape[1], size=1)
+        indeg[max_indices[0, index]] = indeg[max_indices[0, index]] - 1
+
+    # check if the number of tests per individual is less than the max, and, 
+    # if we can, fix it
+    if tests_per_individual < max_tests_per_individual:
+        if (np.sum(indeg) < np.sum(outdeg)):
+            while (np.sum(indeg) < np.sum(outdeg)):
+                # select index randomly (bad: might cause largest index to be updated, 
+                #   possibly exceed max_tests_per_individual)
+                # index = np.random.randint(0,N)
+
+                # select index corresponding to one of the minimum indices
+                nz_indeg = indeg[(indeg > 0)]
+                min_indices = np.array(np.where(indeg == nz_indeg.min()))
+                # min_indices = indeg[(indeg == nz_indeg.min())]# & (indeg > 0)]
+                # min_indices = np.array(np.where((indeg == indeg.min()) & (indeg > 0)))
+                # print(min_indices)
+                # print(min_indices.shape[1])
+                index = np_random.randint(min_indices.shape[1], size=1)
+                # print('generated index = ' + str(index))
+                indeg[min_indices[0, index]] = indeg[min_indices[0, index]] + 1
+
+            # output stats after fixing
+            if verbose:
+                print("after fixing")
+                print("out degree sequence: {}".format(outdeg.tolist()))
+                print("in degree sequence:  {}".format(indeg.tolist()))
+                print("sum outdeg (groups) = {}".format(np.sum(outdeg)))
+                print("sum indeg (individ) = {}".format(np.sum(indeg)))
+
+    else:
+        try:
+            assert np.sum(outdeg) == np.sum(indeg)
+        except AssertionError:
+            errstr = ("Assertion Failed: Require sum(outdeg) = " + str(np.sum(outdeg)) + " == " \
+                      + str(np.sum(indeg)) + " = sum(indeg)")
+            print(errstr)
+            print("out degree sequence: {}".format(outdeg.tolist()))
+            print("in degree sequence: {}".format(indeg.tolist()))
+            # sys.exit()
+
+    # generate the graph
+    try:
+        assert igraph._igraph.is_graphical_degree_sequence(outdeg.tolist(), indeg.tolist())
+        g = igraph.Graph.Degree_Sequence(outdeg.tolist(), indeg.tolist(), graph_gen_method)
+        g.vs['vertex_type'] = vtypes
+        assert np.sum(outdeg) == len(g.get_edgelist())
+    except AssertionError:
+        errstr = ("Assertion Failed: Require [sum(outdeg) = " + str(np.sum(outdeg)) + "] == [" \
+                  + str(np.sum(indeg)) + " = sum(indeg)] == [ |E(G)| = " + str(len(g.get_edgelist())) + "]")
+    except igraph._igraph.InternalError as err:
+        print("igraph InternalError (likely invalid outdeg or indeg sequence): {0}".format(err))
+        print("out degree sequence: {}".format(outdeg.tolist()))
+        print("in degree sequence: {}".format(indeg.tolist()))
+        sys.exit()
+    except:
+        print("Unexpected error:", sys.exec_info()[0])
+        sys.exit()
+    else:
+        # get the adjacency matrix corresponding to the nodes of the graph
+        A = np.array(g.get_adjacency()._get_data())
+        if verbose:
+            # print(g)
+            # print(A)
+            # print("before resizing")
+            # print(A.shape)
+            print("row sum {}".format(np.sum(A, axis=1)))
+            print("column sum {}".format(np.sum(A, axis=0)))
+
+        # the generated matrix has nonzeros in bottom left with zeros 
+        # everywhere else, resize to it's m x N
+        A = A[N:m + N, 0:N]
+        # A = np.minimum(A, 1)
+
+        # check if the graph corresponds to a bipartite graph
+        check_bipartite = g.is_bipartite()
+
+        # save the row and column sums
+        row_sum = np.sum(A, axis=1)
+        col_sum = np.sum(A, axis=0)
+
+        # display properties of A and graph g
+        if verbose:
+            # print(A)
+            # print("after resizing")
+            # print(A.shape)
+            print("row sum {}".format(row_sum))
+            print("column sum {}".format(col_sum))
+            print("max row sum {}".format(max(row_sum)))
+            print("max column sum {}".format(max(col_sum)))
+            print("min row sum {}".format(min(row_sum)))
+            print("min column sum {}".format(min(col_sum)))
+            print("g is bipartite: {}".format(check_bipartite))
+
+        # set options and plot corresponding graph
+        if plotting:
+            layout = g.layout("auto")
+            color_dict = {1: "blue", 0: "red"}
+            g.vs['color'] = [color_dict[vertex_type] for vertex_type in g.vs['vertex_type']]
+            B = g.vs.select(vertex_type='B')
+            C = g.vs.select(vertex_type='C')
+            visual_style = {}
+            visual_style['vertex_size'] = 10
+            visual_style['layout'] = layout
+            visual_style['edge_width'] = 0.5
+            visual_style['edge_arrow_width'] = 0.2
+            visual_style['bbox'] = (1200, 1200)
+            igraph.drawing.plot(g, **visual_style)
+
+        # save data to a MATLAB ".mat" file
+        data_filename = run_ID + '_generate_groups_output.mat'
+        # if saving:
+        #     data = {}
+        #     # data['A'] = A
+        #     data['bipartite'] = check_bipartite
+        #     data['indeg'] = indeg
+        #     data['outdeg'] = outdeg
+        #     data['min_col_sum'] = min(col_sum)
+        #     data['min_row_sum'] = min(row_sum)
+        #     data['max_col_sum'] = max(col_sum)
+        #     data['max_row_sum'] = max(row_sum)
+        #     data['opts'] = opts
+        #     sio.savemat(opts['data_filename'], data)
+
+    # return the adjacency matrix of the graph
+    return A
+
+
+# main method for testing
+if __name__ == '__main__':
+
+    # print igraph version
+    print("Loaded igraph version {}".format(igraph.__version__))
+
+    # options for plotting, verbose output, saving, seed
+    opts = {}
+    opts['m'] = 100
+    opts['N'] = 500
+    opts['group_size'] = 30
+    opts['max_tests_per_individual'] = 15
+    opts['graph_gen_method'] = 'no_multiple'  # options are "no_multiple" or "simple"
+    opts['verbose'] = True  # False
+    opts['plotting'] = False  # False
+    opts['saving'] = True
+    opts['run_ID'] = 'GT_matrix_generation_component'
+    #opts['data_filename'] = opts['run_ID'] + '_generate_groups_output.mat'
+    opts['seed'] = 0
+
+    # generate the measurement matrix with igraph
+    A = gen_measurement_matrix(**opts)
+
+    # print shape of matrix
+    if opts['verbose']:
+        print("Generated adjacency matrix of size:")
+        print(A.shape)
diff --git a/build/lib/group_testing/generate_individual_status.py b/build/lib/group_testing/generate_individual_status.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jun 25 13:45:50 2020
+
+@author: ndexter
+"""
+
+import sys, math
+from os import path
+import random
+import numpy as np
+#np.set_printoptions(threshold=np.inf)
+np.set_printoptions(edgeitems=60, linewidth=100000, 
+    formatter=dict(float=lambda x: "%.3g" % x))
+import scipy.io as sio
+
+"""
+Function to generate the infected status of individuals (a vector)
+"""
+def gen_status_vector(seed=0, N=1000, s=10, verbose=False):
+
+    # set the seed used for status generation
+    local_random = random.Random()
+    local_random.seed(seed)
+    np.random.seed(seed)
+
+    # generate a random vector having sparsity level s
+    indices = local_random.sample(range(N), s)
+    u = np.zeros((N, 1))
+    for i in indices:
+        u[i] = 1
+
+    try:
+        assert np.sum(u) == s
+    except AssertionError:
+        errstr = ("Assertion Failed: opts['s'] = " + str(s) \
+            + ", since sum(u) = " + str(np.sum(u))) 
+        print(errstr)
+        sys.exit()
+
+    return u
+
+if __name__ == '__main__':
+
+    # options for plotting, verbose output, saving, seed
+    opts = {}
+    opts['N'] = 500
+    opts['s'] = 20
+    opts['verbose'] = True #False
+    # opts['plotting'] = True #False
+    # opts['saving'] = True
+    # opts['run_ID'] = 'GT_status_vector_generation_component'
+    # opts['data_filename'] = opts['run_ID'] + '_generate_groups_output.mat'
+    opts['seed'] = 0
+
+    u = gen_status_vector(**opts)
+
+    # print shape of matrix
+    if opts['verbose']:
+        print("Generated status vector of size:")
+        print(u.shape)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@

		[console_scripts]
		GroupTesting = group_testing.group_testing:main
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		_program = "GroupTesting"
		__version__ = "0.1.0"