diff --git a/ntt_fromR.py b/ntt_fromR.py index 3128259..efea92b 100644 --- a/ntt_fromR.py +++ b/ntt_fromR.py @@ -62,7 +62,7 @@ 2037004893632210474603310977018868984748962115156511416403800420355825477294773422841921621, 2135970739633099406506331558604044839577416110609503442457036518698624890730242443329312596558002] -filepath = '/Users/maryam/Desktop/Research/RF_improvment/FinalCodes/testNTT.txt' +filepath = '~/testNTT.txt' vec = [] with open(filepath) as fp: line = fp.readline() @@ -87,5 +87,5 @@ conv.append(T) -with open('/Users/maryam/Desktop/Research/RF_improvment/FinalCodes/outNTT.txt', 'w') as outNTT: - outNTT.writelines("%s\n" % i for i in conv) \ No newline at end of file +with open('~/outNTT.txt', 'w') as outNTT: + outNTT.writelines("%s\n" % i for i in conv) diff --git a/rfdist/LICENSE b/rfdist/LICENSE new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/rfdist/LICENSE @@ -0,0 +1 @@ + diff --git a/rfdist/build/lib.linux-x86_64-2.7/rfdist/RF.py b/rfdist/build/lib.linux-x86_64-2.7/rfdist/RF.py new file mode 100644 index 0000000..3453a1d --- /dev/null +++ b/rfdist/build/lib.linux-x86_64-2.7/rfdist/RF.py @@ -0,0 +1,305 @@ +import time +import numpy as np +from numpy import transpose as t +from numpy import sum +from math import factorial + + +def Beta(m): + """ + Double factorial of odd numbers: a(m) = (2*m-1)!! = 1*3*5*...*(2*m-1) + :param m: integer + :return: Integer + """ + if m <= 0: + ans = 1 + else: + ans = 1 + for i in range(1, m): + ans *= 2 * i + 1 + return ans + + +def internaledges(tree): + """ + The function for computing the number of internal edges for each internal node + :param tree: An ETE tree + :return: Array of the number of internal edges for each internal node. + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + intedges = [0] * (ntip - 1) + edges = get_edges(tree) + for i in range(2 * ntip - 1, ntip, -1): + children = [] + for idx, edge in enumerate(edges): + if str(i) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num <= ntip and child2num <= ntip: + intedges[i - ntip - 1] = 0 + elif child1num <= ntip < child2num: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + 1 + elif child2num <= ntip < child1num: + intedges[i - ntip - 1] = intedges[child1num - ntip - 1] + 1 + else: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + intedges[child1num - ntip - 1] + 2 + return intedges + + +def get_edges(tree): + """ + Get edges of the tree in the form of (node_start,node_end) + :param tree: ETE tree + :return: List of edges of a tree of a list of tuples (node_start,node_end) + """ + edges = [] + for node in tree.traverse("postorder"): + if not node.is_leaf(): + edges.append((node, node.children[0])) + edges.append((node, node.children[1])) + return edges + + +def label_nodes(tree): + """ + Label the internal nodes in the tree. + :param tree: an ETE tree. + :return: None + """ + i = len(tree.get_leaves()) + 1 + t = 1 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + node.name = "n" + str(i) + i += 1 + else: + node.name = "t" + str(t) + t += 1 + + +def internalchildren(tree, v): + """ + This function computes the number of internal children of each node. v is the node. + :param tree: An ETE tree + :param v: Integer label of node + :return: Number of internal children for the node, v + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + edges = get_edges(tree) + children = [] + for idx, edge in enumerate(edges): + if str(v) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num > ntip and child2num > ntip: + result = [2, child1num, child2num] + elif child1num > ntip >= child2num: + result = [1, child1num] + elif child2num > ntip >= child1num: + result = [1, child2num] + else: + result = [0] + return result + + +def RF(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix + """ + + ntip = n - 1 + N = 0 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(t(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + sum3 = np.zeros((ntip - 2, ntip - 2)) + for s in range(0, (ntip - 2)): + for k in range(1, (ntip - 2)): + total3 = 0 + for s1 in range(-1, s + 1): + for k1 in range(-1, (k - 1)): + total3 += Rchild1[s1 + 1][k1 + 1] * Rchild2[s - s1][k - 2 - k1] + sum3[s, k] = total3 + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R + + +def RF_sum3_performance(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree and records time of the sum3 section. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix, sum3 performances in 1 dimensional array of seconds + """ + sum3_dt_list = [] + ntip = n - 1 + N = 0 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(t(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + sum3_t0 = time.time() + sum3 = np.zeros((ntip - 2, ntip - 2)) + for s in range(0, (ntip - 2)): + for k in range(1, (ntip - 2)): + total3 = 0 + for s1 in range(-1, s + 1): + for k1 in range(-1, (k - 1)): + total3 += Rchild1[s1 + 1][k1 + 1] * Rchild2[s - s1][k - 2 - k1] + sum3[s, k] = total3 + sum3_tf = time.time() + sum3_dt = sum3_tf - sum3_t0 + sum3_dt_list.append(sum3_dt) + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R, sum3_dt_list + + +def RsT(R, n, s): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param s: integer + :return: integer + """ + B = [] + for k in range(0, (n - 2)): + B.append(Beta(k + 1)) + # print(t(R[0][s,0:(n - 2 - s)])) + rst = sum(t(t(R[0][s, 0:(n - 2 - s)]) * B[0:(n - 2 - s)])) + return rst + + +def qmT(R, n, m): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param m: integer + :return: integer + """ + qmt = 0 + for s in range(m, (n - 2)): + rst = RsT(R, n, s) + qmt = qmt + (factorial(s) / (factorial(m) * factorial(s - m))) * rst * pow((-1), (s - m)) + return qmt + + +def polynomial(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array + """ + Coef = [] + R = RF(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef + + +def polynomial_sum3_performance(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array, sum3 performance in 1 dimensional array in seconds + """ + Coef = [] + R, sum3_dt_list = RF_sum3_performance(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef, sum(sum3_dt_list) diff --git a/rfdist/build/lib.linux-x86_64-2.7/rfdist/RF_NTT.py b/rfdist/build/lib.linux-x86_64-2.7/rfdist/RF_NTT.py new file mode 100644 index 0000000..5d703b5 --- /dev/null +++ b/rfdist/build/lib.linux-x86_64-2.7/rfdist/RF_NTT.py @@ -0,0 +1,440 @@ +import time +import numpy as np +from numpy import transpose as t +from numpy import sum +from math import factorial +import math +#import primes +import ntt + +Len=[8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072] +Primes=[113,115792089237316195423570985008687907853269984665640564039457584007913129637873, + 115792089237316195423570985008687907853269984665640564039457584007913129636801, + 115792089237316195423570985008687907853269984665640564039457584007913129636801, + 115792089237316195423570985008687907853269984665640564039457584007913129635457, + 115792089237316195423570985008687907853269984665640564039457584007913129607681, + 115792089237316195423570985008687907853269984665640564039457584007913129607681, + 1606938044258990275541962092341162602522202993782792835297281, + 115792089237316195423570985008687907853269984665640564039457584007913129461761, + 115792089237316195423570985008687907853269984665640564039457584007913129172993, + 115792089237316195423570985008687907853269984665640564039457584007913129172993, + 115792089237316195423570985008687907853269984665640564039457584007913126920193, + 2037035976334486086268445688409378161051468393665936250636140449354381299763336706178908161, + 2037035976334486086268445688409378161051468393665936250636140449354381299763336706177892353, + 2135987035920910082395021706169552114602704522356652769947041607822219725780640550022962063736833] +G=[3,21,21,7,14,14,17,3,5,5,5,7,3,3,5] +W=[18,111631467676984398667335374000770145103933448276474029826861642673272833963082, + 61564226408844285440839285786349609218130544814944526994086477601177658749278, + 81935540147743287569595753552990576696362777472267730857667504939936024849618, + 91170141105073899547981930648308993603853306285937430424976477296731306054334, + 15708606027314919182172787703297741273850535825986009804848210667018260880261, + 33237221508024116229589036146826233600929365121793224404567006713723969953911, + 1434356156435145496244722803431900791032286202821828147418984, + 63540838781496461220816358160078141041184179050497342724817951168620586697763, + 15509384003381570599629987132205606235150174359826996727314588125855310328659, + 25133251670451185231426344419964958947927332720991278511886330943333482712954, + 48268024184090406204552669607243172351613601631957894966972042240077595762455, + 1981954346943524807557239944865006706282615432034635354549839215713305664395133015635456087, + 255453916006749145705554152189433435708896877084778108205342427278941085195694645573514698, + 1112225229508435524512344497914210498809987905889897633818900952286269329043189880863555612895717] +Inv_W=[44,95491701939787783842804801121993865397654678743734107460462393771310698665647, + 14352436843288370756829869005573506932522362276688548257485320352002835319209, + 62892232488306180704111955726624356283223010632387729863360038676442428890163, + 74617868801387775053539490766464698026550495075777446366514898818780219762498, + 13868795121166213445530776440016103808108592816062879198885163286366532864581, + 110859567440002879494961048339626890678895648700881402044340171696425633801612, + 1203745758939828332217036953616990186340140894893888367743845, + 90289433802154956706057573109440369585442036636363262555556580120598543329883, + 21861149690056766619999951991678670952283712223678074390943578332757181356925, + 49072410160240151341757778422903690290128844704330943680519698627182085130938, + 20156023685761413501013067524878709675678100528950462782526586000840609710652, + 876083208576290984072439742436788642271124879220068514346599382871681217068777930632586646, + 1474657800099070887974174855844346785758665342966052193836639675246968569796074002531960855, + 1525680669204487905316664361660865987506438064471172450598417789819020549260814728309162207838676] + +Inv_L=[99,108555083659983933209597798445644913612440610624038028786991485007418559035506, + 112173586448650064316584391727166410732855297644839296413224534507665844335651, + 113982837842983129870077688367927159293062641155239930226341059257789486986226, + 114887463540149662646824336688307533573166312910440247132899321632851308310180, + 115339776388732929035197660848497720713218148788040405586178452820382218945151, + 115565932813024562229384322928592814283244066726840484812818018414147674276416, + 1605368768825143605351003144985360685918177404921676826669061, + 115735550131243287125024319488664134460763505180940544232797692609471765629016, + 115763819684279741274297652248676021157016744923290554136127638308692447256691, + 115777954460797968348934318628681964505143364794465559087792611158302788214842, + 115785021849057081886252651818684936179206674730053061563625097583107956441255, + 2036973810929934862938176265628359808446455836647086582171460391357269654826210139506966666, + 2037004893632210474603310977018868984748962115156511416403800420355825477294773422841921621, + 2135970739633099406506331558604044839577416110609503442457036518698624890730242443329312596558002] + +def Beta(m): + """ + Double factorial of odd numbers: a(m) = (2*m-1)!! = 1*3*5*...*(2*m-1) + :param m: integer + :return: Integer + """ + if m <= 0: + ans = 1 + else: + ans = 1 + for i in range(1, m): + ans *= 2 * i + 1 + return ans + + +def internaledges(tree): + """ + The function for computing the number of internal edges for each internal node + :param tree: An ETE tree + :return: Array of the number of internal edges for each internal node. + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + intedges = [0] * (ntip - 1) + edges = get_edges(tree) + for i in range(2 * ntip - 1, ntip, -1): + children = [] + for idx, edge in enumerate(edges): + if str(i) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num <= ntip and child2num <= ntip: + intedges[i - ntip - 1] = 0 + elif child1num <= ntip < child2num: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + 1 + elif child2num <= ntip < child1num: + intedges[i - ntip - 1] = intedges[child1num - ntip - 1] + 1 + else: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + intedges[child1num - ntip - 1] + 2 + return intedges + + +def get_edges(tree): + """ + Get edges of the tree in the form of (node_start,node_end) + :param tree: ETE tree + :return: List of edges of a tree of a list of tuples (node_start,node_end) + """ + edges = [] + for node in tree.traverse("postorder"): + if not node.is_leaf(): + edges.append((node, node.children[0])) + edges.append((node, node.children[1])) + return edges + + +def label_nodes(tree): + """ + Label the internal nodes in the tree. + :param tree: an ETE tree. + :return: None + """ + i = len(tree.get_leaves()) + 1 + t = 1 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + node.name = "n" + str(i) + i += 1 + else: + node.name = "t" + str(t) + t += 1 + + +def internalchildren(tree, v): + """ + This function computes the number of internal children of each node. v is the node. + :param tree: An ETE tree + :param v: Integer label of node + :return: Number of internal children for the node, v + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + edges = get_edges(tree) + children = [] + for idx, edge in enumerate(edges): + if str(v) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num > ntip and child2num > ntip: + result = [2, child1num, child2num] + elif child1num > ntip >= child2num: + result = [1, child1num] + elif child2num > ntip >= child1num: + result = [1, child2num] + else: + result = [0] + return result + + +def RF_NTT(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix + """ + + ntip = n - 1 + N = 0 + hp=(n-4)*(n-2)*3 + L= 2**(math.ceil(math.log(hp,10)/math.log(2,10))) + + + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(np.transpose(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + R1 = Rchild1[0:(ntip - 1), 0:(ntip - 3)] + R1aug = np.zeros(L) + t = 0 + for i in range(R1.shape[1]): + R1aug[t:(t + R1.shape[0])] = R1[:, i] + t = t + (3 * R1.shape[0]) + + R2 = Rchild2[0:(ntip - 1), 0:(ntip - 3)] + R2aug = np.zeros(L) + t = 0 + for i in range(R2.shape[1]): + R2aug[t:(t + R2.shape[0])] = R2[:, i] + t = t + (3 * R2.shape[0]) + R1aug = R1aug.tolist() + R2aug = R2aug.tolist() + + ind = Len.index(L) + p = Primes[ind] + w = W[ind] + inv_w = Inv_W[ind] + inv_L = Inv_L[ind] + + R1aug = [int(x) for x in R1aug] + R2aug = [int(x) for x in R2aug] + + #start = time.time() + Ivec1 = ntt.transform_fast(R1aug, w, p) + Ivec2 = ntt.transform_fast(R2aug, w, p) + Ivec3 = [(x * y) % p for (x, y) in zip(Ivec1, Ivec2)] + conv = ntt.inverse_transform(Ivec3, inv_w, inv_L, p) + pad = math.ceil(len(R1aug) / (3 * R1.shape[0])) + conv_pad = [0] * (pad * 3 * R1.shape[0] - len(R1aug)) + sum3 = np.array(conv + conv_pad) + #sum3 = np.array(sum3) + nrow = 3 * R1.shape[0] + ncol = len(sum3) // nrow + sum3 = np.transpose(np.reshape(sum3, (ncol, nrow)))[0:R1.shape[0], 0:R1.shape[1]][1:R1.shape[0], ] + #sum3 = np.transpose(sum3) + #sum3 = sum3[0:R1.shape[0], 0:R1.shape[1]] + #sum3 = sum3[1:R1.shape[0], ] + z = np.zeros((R1.shape[0] - 1, 1)) + sum3 = np.concatenate((z, sum3), axis=1) + #sum3_tf = time.time() + #sum3_dt = sum3_tf - sum3_t0 + + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R + + +def RsT(R, n, s): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param s: integer + :return: integer + """ + B = [] + for k in range(0, (n - 2)): + B.append(Beta(k + 1)) + # print(t(R[0][s,0:(n - 2 - s)])) + rst = sum(t(t(R[0][s, 0:(n - 2 - s)]) * B[0:(n - 2 - s)])) + return rst + + +def qmT(R, n, m): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param m: integer + :return: integer + """ + qmt = 0 + for s in range(m, (n - 2)): + rst = RsT(R, n, s) + qmt = qmt + (factorial(s) / (factorial(m) * factorial(s - m))) * rst * pow((-1), (s - m)) + return qmt + +def RF_NTT_sum3_performance(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree and records time of the sum3 section. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix, sum3 performances in 1 dimensional array of seconds + """ + sum3_dt_list = [] + ntip = n - 1 + N = 0 + hp = (n - 4) * (n - 2) * 3 + L = 2 ** (math.ceil(math.log(hp, 10) / math.log(2, 10))) + + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(np.transpose(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + R1 = Rchild1[0:(ntip - 1), 0:(ntip - 3)] + R1aug = np.zeros(L) + t = 0 + for i in range(R1.shape[1]): + R1aug[t:(t + R1.shape[0])] = R1[:, i] + t = t + (3 * R1.shape[0]) + + R2 = Rchild2[0:(ntip - 1), 0:(ntip - 3)] + R2aug = np.zeros(L) + t = 0 + for i in range(R2.shape[1]): + R2aug[t:(t + R2.shape[0])] = R2[:, i] + t = t + (3 * R2.shape[0]) + R1aug = R1aug.tolist() + R2aug = R2aug.tolist() + + ind = Len.index(L) + p = Primes[ind] + w = W[ind] + inv_w = Inv_W[ind] + inv_L = Inv_L[ind] + + R1aug = [int(x) for x in R1aug] + R2aug = [int(x) for x in R2aug] + + sum3_t0 = time.time() + Ivec1 = ntt.transform_fast(R1aug, w, p) + Ivec2 = ntt.transform_fast(R2aug, w, p) + Ivec3 = [(x * y) % p for (x, y) in zip(Ivec1, Ivec2)] + conv = ntt.inverse_transform(Ivec3, inv_w, inv_L, p) + pad = math.ceil(len(R1aug) / (3 * R1.shape[0])) + conv_pad = [0] * (pad * 3 * R1.shape[0] - len(R1aug)) + sum3 = np.array(conv + conv_pad) + # sum3 = np.array(sum3) + nrow = 3 * R1.shape[0] + ncol = len(sum3) // nrow + sum3 = np.transpose(np.reshape(sum3, (ncol, nrow)))[0:R1.shape[0], 0:R1.shape[1]][1:R1.shape[0], ] + z = np.zeros((R1.shape[0] - 1, 1)) + sum3 = np.concatenate((z, sum3), axis=1) + sum3_tf = time.time() + sum3_dt = sum3_tf - sum3_t0 + sum3_dt_list.append(sum3_dt) + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R, sum3_dt_list + + +def polynomial(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array + """ + Coef = [] + R = RF_NTT(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef + + +def polynomial_sum3_performance(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array, sum3 performance in 1 dimensional array in seconds + """ + Coef = [] + R, sum3_dt_list = RF_NTT_sum3_performance(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef, sum(sum3_dt_list) \ No newline at end of file diff --git a/rfdist/build/lib.linux-x86_64-2.7/rfdist/__init__.py b/rfdist/build/lib.linux-x86_64-2.7/rfdist/__init__.py new file mode 100644 index 0000000..9714d64 --- /dev/null +++ b/rfdist/build/lib.linux-x86_64-2.7/rfdist/__init__.py @@ -0,0 +1,8 @@ +from .RF import internalchildren, internaledges, label_nodes, get_edges, RF, RsT, qmT, polynomial, Beta, \ + polynomial_sum3_performance, RF_sum3_performance +from .ntt import transform_fast, convolve_ntt, inverse_transform +from .RF_NTT import Len, Primes, G, W, Inv_W, Inv_L, RF_NTT, polynomial +name = "rfdist" +__all__ = ['RF', 'internalchildren', 'internaledges', 'label_nodes', 'get_edges', 'RsT', 'qmT', 'polynomial', 'Beta', + 'polynomial_sum3_performance', 'RF_sum3_performance', 'transform_fast', 'convolve_ntt', 'inverse_transform', + 'Len', 'Primes', 'G', 'W', 'Inv_W', 'Inv_L', 'RF_NTT', 'polynomial'] diff --git a/rfdist/build/lib.linux-x86_64-2.7/rfdist/ntt.py b/rfdist/build/lib.linux-x86_64-2.7/rfdist/ntt.py new file mode 100644 index 0000000..4260c7c --- /dev/null +++ b/rfdist/build/lib.linux-x86_64-2.7/rfdist/ntt.py @@ -0,0 +1,60 @@ +# Computes the forward number-theoretic transform of the given vector, +# with respect to the given primitive nth root of unity under the given modulus. +# The length of the vector must be a power of 2. + +def transform_fast(vector, root, mod): + transform_vec = vector + n = len(transform_vec) + levels = n.bit_length() - 1 + if 1 << levels != n: + raise ValueError("Length is not a power of 2") + + powtable = [] + temp = 1 + for i in range(n // 2): + powtable.append(temp) + temp = temp * root % mod + + def reverse(x, bits): + y = 0 + for i in range(bits): + y = (y << 1) | (x & 1) + x >>= 1 + return y + + for i in range(n): + j = reverse(i, levels) + if j > i: + transform_vec[i], transform_vec[j] = transform_vec[j], transform_vec[i] + size = 2 + while size <= n: + halfsize = size // 2 + tablestep = n // size + for i in range(0, n, size): + k = 0 + for j in range(i, i + halfsize): + l = j + halfsize + left = transform_vec[j] + right = transform_vec[l] * powtable[k] + transform_vec[j] = (left + right) % mod + transform_vec[l] = (left - right) % mod + k += tablestep + size *= 2 + return (transform_vec) + + +# Compute the inverse of ntt + +def inverse_transform(vector, inv_root, inv_L, mod): + outvec = transform_fast(vector, inv_root, mod) + return [(val * inv_L % mod) for val in outvec] + + +# Compute the convolution using ntt + +def convolve_ntt(vec1, vec2, root, inv_root, inv_L, mod): + temp1 = transform_fast(vec1, root, mod) + temp2 = transform_fast(vec2, root, mod) + temp3 = [(x * y % mod) for (x, y) in zip(temp1, temp2)] + conv = inverse_transform(temp3, inv_root, inv_L, mod) + return conv \ No newline at end of file diff --git a/rfdist/build/lib/rfdist/RF.py b/rfdist/build/lib/rfdist/RF.py new file mode 100644 index 0000000..3453a1d --- /dev/null +++ b/rfdist/build/lib/rfdist/RF.py @@ -0,0 +1,305 @@ +import time +import numpy as np +from numpy import transpose as t +from numpy import sum +from math import factorial + + +def Beta(m): + """ + Double factorial of odd numbers: a(m) = (2*m-1)!! = 1*3*5*...*(2*m-1) + :param m: integer + :return: Integer + """ + if m <= 0: + ans = 1 + else: + ans = 1 + for i in range(1, m): + ans *= 2 * i + 1 + return ans + + +def internaledges(tree): + """ + The function for computing the number of internal edges for each internal node + :param tree: An ETE tree + :return: Array of the number of internal edges for each internal node. + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + intedges = [0] * (ntip - 1) + edges = get_edges(tree) + for i in range(2 * ntip - 1, ntip, -1): + children = [] + for idx, edge in enumerate(edges): + if str(i) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num <= ntip and child2num <= ntip: + intedges[i - ntip - 1] = 0 + elif child1num <= ntip < child2num: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + 1 + elif child2num <= ntip < child1num: + intedges[i - ntip - 1] = intedges[child1num - ntip - 1] + 1 + else: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + intedges[child1num - ntip - 1] + 2 + return intedges + + +def get_edges(tree): + """ + Get edges of the tree in the form of (node_start,node_end) + :param tree: ETE tree + :return: List of edges of a tree of a list of tuples (node_start,node_end) + """ + edges = [] + for node in tree.traverse("postorder"): + if not node.is_leaf(): + edges.append((node, node.children[0])) + edges.append((node, node.children[1])) + return edges + + +def label_nodes(tree): + """ + Label the internal nodes in the tree. + :param tree: an ETE tree. + :return: None + """ + i = len(tree.get_leaves()) + 1 + t = 1 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + node.name = "n" + str(i) + i += 1 + else: + node.name = "t" + str(t) + t += 1 + + +def internalchildren(tree, v): + """ + This function computes the number of internal children of each node. v is the node. + :param tree: An ETE tree + :param v: Integer label of node + :return: Number of internal children for the node, v + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + edges = get_edges(tree) + children = [] + for idx, edge in enumerate(edges): + if str(v) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num > ntip and child2num > ntip: + result = [2, child1num, child2num] + elif child1num > ntip >= child2num: + result = [1, child1num] + elif child2num > ntip >= child1num: + result = [1, child2num] + else: + result = [0] + return result + + +def RF(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix + """ + + ntip = n - 1 + N = 0 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(t(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + sum3 = np.zeros((ntip - 2, ntip - 2)) + for s in range(0, (ntip - 2)): + for k in range(1, (ntip - 2)): + total3 = 0 + for s1 in range(-1, s + 1): + for k1 in range(-1, (k - 1)): + total3 += Rchild1[s1 + 1][k1 + 1] * Rchild2[s - s1][k - 2 - k1] + sum3[s, k] = total3 + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R + + +def RF_sum3_performance(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree and records time of the sum3 section. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix, sum3 performances in 1 dimensional array of seconds + """ + sum3_dt_list = [] + ntip = n - 1 + N = 0 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(t(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + sum3_t0 = time.time() + sum3 = np.zeros((ntip - 2, ntip - 2)) + for s in range(0, (ntip - 2)): + for k in range(1, (ntip - 2)): + total3 = 0 + for s1 in range(-1, s + 1): + for k1 in range(-1, (k - 1)): + total3 += Rchild1[s1 + 1][k1 + 1] * Rchild2[s - s1][k - 2 - k1] + sum3[s, k] = total3 + sum3_tf = time.time() + sum3_dt = sum3_tf - sum3_t0 + sum3_dt_list.append(sum3_dt) + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R, sum3_dt_list + + +def RsT(R, n, s): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param s: integer + :return: integer + """ + B = [] + for k in range(0, (n - 2)): + B.append(Beta(k + 1)) + # print(t(R[0][s,0:(n - 2 - s)])) + rst = sum(t(t(R[0][s, 0:(n - 2 - s)]) * B[0:(n - 2 - s)])) + return rst + + +def qmT(R, n, m): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param m: integer + :return: integer + """ + qmt = 0 + for s in range(m, (n - 2)): + rst = RsT(R, n, s) + qmt = qmt + (factorial(s) / (factorial(m) * factorial(s - m))) * rst * pow((-1), (s - m)) + return qmt + + +def polynomial(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array + """ + Coef = [] + R = RF(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef + + +def polynomial_sum3_performance(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array, sum3 performance in 1 dimensional array in seconds + """ + Coef = [] + R, sum3_dt_list = RF_sum3_performance(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef, sum(sum3_dt_list) diff --git a/rfdist/build/lib/rfdist/RF_NTT.py b/rfdist/build/lib/rfdist/RF_NTT.py new file mode 100644 index 0000000..5d703b5 --- /dev/null +++ b/rfdist/build/lib/rfdist/RF_NTT.py @@ -0,0 +1,440 @@ +import time +import numpy as np +from numpy import transpose as t +from numpy import sum +from math import factorial +import math +#import primes +import ntt + +Len=[8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072] +Primes=[113,115792089237316195423570985008687907853269984665640564039457584007913129637873, + 115792089237316195423570985008687907853269984665640564039457584007913129636801, + 115792089237316195423570985008687907853269984665640564039457584007913129636801, + 115792089237316195423570985008687907853269984665640564039457584007913129635457, + 115792089237316195423570985008687907853269984665640564039457584007913129607681, + 115792089237316195423570985008687907853269984665640564039457584007913129607681, + 1606938044258990275541962092341162602522202993782792835297281, + 115792089237316195423570985008687907853269984665640564039457584007913129461761, + 115792089237316195423570985008687907853269984665640564039457584007913129172993, + 115792089237316195423570985008687907853269984665640564039457584007913129172993, + 115792089237316195423570985008687907853269984665640564039457584007913126920193, + 2037035976334486086268445688409378161051468393665936250636140449354381299763336706178908161, + 2037035976334486086268445688409378161051468393665936250636140449354381299763336706177892353, + 2135987035920910082395021706169552114602704522356652769947041607822219725780640550022962063736833] +G=[3,21,21,7,14,14,17,3,5,5,5,7,3,3,5] +W=[18,111631467676984398667335374000770145103933448276474029826861642673272833963082, + 61564226408844285440839285786349609218130544814944526994086477601177658749278, + 81935540147743287569595753552990576696362777472267730857667504939936024849618, + 91170141105073899547981930648308993603853306285937430424976477296731306054334, + 15708606027314919182172787703297741273850535825986009804848210667018260880261, + 33237221508024116229589036146826233600929365121793224404567006713723969953911, + 1434356156435145496244722803431900791032286202821828147418984, + 63540838781496461220816358160078141041184179050497342724817951168620586697763, + 15509384003381570599629987132205606235150174359826996727314588125855310328659, + 25133251670451185231426344419964958947927332720991278511886330943333482712954, + 48268024184090406204552669607243172351613601631957894966972042240077595762455, + 1981954346943524807557239944865006706282615432034635354549839215713305664395133015635456087, + 255453916006749145705554152189433435708896877084778108205342427278941085195694645573514698, + 1112225229508435524512344497914210498809987905889897633818900952286269329043189880863555612895717] +Inv_W=[44,95491701939787783842804801121993865397654678743734107460462393771310698665647, + 14352436843288370756829869005573506932522362276688548257485320352002835319209, + 62892232488306180704111955726624356283223010632387729863360038676442428890163, + 74617868801387775053539490766464698026550495075777446366514898818780219762498, + 13868795121166213445530776440016103808108592816062879198885163286366532864581, + 110859567440002879494961048339626890678895648700881402044340171696425633801612, + 1203745758939828332217036953616990186340140894893888367743845, + 90289433802154956706057573109440369585442036636363262555556580120598543329883, + 21861149690056766619999951991678670952283712223678074390943578332757181356925, + 49072410160240151341757778422903690290128844704330943680519698627182085130938, + 20156023685761413501013067524878709675678100528950462782526586000840609710652, + 876083208576290984072439742436788642271124879220068514346599382871681217068777930632586646, + 1474657800099070887974174855844346785758665342966052193836639675246968569796074002531960855, + 1525680669204487905316664361660865987506438064471172450598417789819020549260814728309162207838676] + +Inv_L=[99,108555083659983933209597798445644913612440610624038028786991485007418559035506, + 112173586448650064316584391727166410732855297644839296413224534507665844335651, + 113982837842983129870077688367927159293062641155239930226341059257789486986226, + 114887463540149662646824336688307533573166312910440247132899321632851308310180, + 115339776388732929035197660848497720713218148788040405586178452820382218945151, + 115565932813024562229384322928592814283244066726840484812818018414147674276416, + 1605368768825143605351003144985360685918177404921676826669061, + 115735550131243287125024319488664134460763505180940544232797692609471765629016, + 115763819684279741274297652248676021157016744923290554136127638308692447256691, + 115777954460797968348934318628681964505143364794465559087792611158302788214842, + 115785021849057081886252651818684936179206674730053061563625097583107956441255, + 2036973810929934862938176265628359808446455836647086582171460391357269654826210139506966666, + 2037004893632210474603310977018868984748962115156511416403800420355825477294773422841921621, + 2135970739633099406506331558604044839577416110609503442457036518698624890730242443329312596558002] + +def Beta(m): + """ + Double factorial of odd numbers: a(m) = (2*m-1)!! = 1*3*5*...*(2*m-1) + :param m: integer + :return: Integer + """ + if m <= 0: + ans = 1 + else: + ans = 1 + for i in range(1, m): + ans *= 2 * i + 1 + return ans + + +def internaledges(tree): + """ + The function for computing the number of internal edges for each internal node + :param tree: An ETE tree + :return: Array of the number of internal edges for each internal node. + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + intedges = [0] * (ntip - 1) + edges = get_edges(tree) + for i in range(2 * ntip - 1, ntip, -1): + children = [] + for idx, edge in enumerate(edges): + if str(i) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num <= ntip and child2num <= ntip: + intedges[i - ntip - 1] = 0 + elif child1num <= ntip < child2num: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + 1 + elif child2num <= ntip < child1num: + intedges[i - ntip - 1] = intedges[child1num - ntip - 1] + 1 + else: + intedges[i - ntip - 1] = intedges[child2num - ntip - 1] + intedges[child1num - ntip - 1] + 2 + return intedges + + +def get_edges(tree): + """ + Get edges of the tree in the form of (node_start,node_end) + :param tree: ETE tree + :return: List of edges of a tree of a list of tuples (node_start,node_end) + """ + edges = [] + for node in tree.traverse("postorder"): + if not node.is_leaf(): + edges.append((node, node.children[0])) + edges.append((node, node.children[1])) + return edges + + +def label_nodes(tree): + """ + Label the internal nodes in the tree. + :param tree: an ETE tree. + :return: None + """ + i = len(tree.get_leaves()) + 1 + t = 1 + for node in tree.traverse("preorder"): + if not node.is_leaf(): + node.name = "n" + str(i) + i += 1 + else: + node.name = "t" + str(t) + t += 1 + + +def internalchildren(tree, v): + """ + This function computes the number of internal children of each node. v is the node. + :param tree: An ETE tree + :param v: Integer label of node + :return: Number of internal children for the node, v + """ + label_nodes(tree) + ntip = len(tree.get_leaves()) + edges = get_edges(tree) + children = [] + for idx, edge in enumerate(edges): + if str(v) in edge[0].name: + children.append(idx) + child1 = edges[children[0]][1] + child2 = edges[children[1]][1] + child1num = int(child1.name[1:]) + child2num = int(child2.name[1:]) + if child1num > ntip and child2num > ntip: + result = [2, child1num, child2num] + elif child1num > ntip >= child2num: + result = [1, child1num] + elif child2num > ntip >= child1num: + result = [1, child2num] + else: + result = [0] + return result + + +def RF_NTT(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix + """ + + ntip = n - 1 + N = 0 + hp=(n-4)*(n-2)*3 + L= 2**(math.ceil(math.log(hp,10)/math.log(2,10))) + + + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(np.transpose(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + R1 = Rchild1[0:(ntip - 1), 0:(ntip - 3)] + R1aug = np.zeros(L) + t = 0 + for i in range(R1.shape[1]): + R1aug[t:(t + R1.shape[0])] = R1[:, i] + t = t + (3 * R1.shape[0]) + + R2 = Rchild2[0:(ntip - 1), 0:(ntip - 3)] + R2aug = np.zeros(L) + t = 0 + for i in range(R2.shape[1]): + R2aug[t:(t + R2.shape[0])] = R2[:, i] + t = t + (3 * R2.shape[0]) + R1aug = R1aug.tolist() + R2aug = R2aug.tolist() + + ind = Len.index(L) + p = Primes[ind] + w = W[ind] + inv_w = Inv_W[ind] + inv_L = Inv_L[ind] + + R1aug = [int(x) for x in R1aug] + R2aug = [int(x) for x in R2aug] + + #start = time.time() + Ivec1 = ntt.transform_fast(R1aug, w, p) + Ivec2 = ntt.transform_fast(R2aug, w, p) + Ivec3 = [(x * y) % p for (x, y) in zip(Ivec1, Ivec2)] + conv = ntt.inverse_transform(Ivec3, inv_w, inv_L, p) + pad = math.ceil(len(R1aug) / (3 * R1.shape[0])) + conv_pad = [0] * (pad * 3 * R1.shape[0] - len(R1aug)) + sum3 = np.array(conv + conv_pad) + #sum3 = np.array(sum3) + nrow = 3 * R1.shape[0] + ncol = len(sum3) // nrow + sum3 = np.transpose(np.reshape(sum3, (ncol, nrow)))[0:R1.shape[0], 0:R1.shape[1]][1:R1.shape[0], ] + #sum3 = np.transpose(sum3) + #sum3 = sum3[0:R1.shape[0], 0:R1.shape[1]] + #sum3 = sum3[1:R1.shape[0], ] + z = np.zeros((R1.shape[0] - 1, 1)) + sum3 = np.concatenate((z, sum3), axis=1) + #sum3_tf = time.time() + #sum3_dt = sum3_tf - sum3_t0 + + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R + + +def RsT(R, n, s): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param s: integer + :return: integer + """ + B = [] + for k in range(0, (n - 2)): + B.append(Beta(k + 1)) + # print(t(R[0][s,0:(n - 2 - s)])) + rst = sum(t(t(R[0][s, 0:(n - 2 - s)]) * B[0:(n - 2 - s)])) + return rst + + +def qmT(R, n, m): + """ + + :param R: 3 dimensional matrix + :param n: integer + :param m: integer + :return: integer + """ + qmt = 0 + for s in range(m, (n - 2)): + rst = RsT(R, n, s) + qmt = qmt + (factorial(s) / (factorial(m) * factorial(s - m))) * rst * pow((-1), (s - m)) + return qmt + +def RF_NTT_sum3_performance(tree, n): + """ + Calculates Robinson-Foulds metric for an ETE tree and records time of the sum3 section. + :param tree: An ETE tree. + :param n: Number of tips on the tree rooted. + :return: 3 dimensional matrix, sum3 performances in 1 dimensional array of seconds + """ + sum3_dt_list = [] + ntip = n - 1 + N = 0 + hp = (n - 4) * (n - 2) * 3 + L = 2 ** (math.ceil(math.log(hp, 10) / math.log(2, 10))) + + for node in tree.traverse("preorder"): + if not node.is_leaf(): + N += 1 + R = np.zeros((ntip - 1, ntip - 1, N)) + edges = internaledges(tree) + B = [0] * (n - 1) + for k in range(len(B)): + B[k] = Beta(k + 1) + for v in range(N - 1, -1, -1): + intchild = internalchildren(tree, v + ntip + 1) + intedges = edges[v] + if intchild[0] == 0: + R[v][0, 0] = 1 + elif intchild[0] == 1: + Rchild = R[intchild[1] - ntip - 1] + R[v][0][intedges] = 1 + R[v][1:ntip - 1, 0] = sum(np.transpose(Rchild[0:(ntip - 2), ] * B[0:(ntip - 1)]).T, axis=1) + R[v][1:(ntip - 1), 1:(ntip - 1)] = Rchild[1:(ntip - 1), 0:(ntip - 2)] + else: + Rchild1 = R[intchild[1] - ntip - 1] + Rchild2 = R[intchild[2] - ntip - 1] + + R[v][0, intedges] = 1 + R[v][2, 0] = sum(Rchild1[0,] * B[0:(ntip - 1)] * sum(Rchild2[0,] * B[0:(ntip - 1)])) + for s in range(3, (ntip - 1), 1): + R[v][s, 0] = sum(sum(Rchild1[0:(s - 1), ] * B[0:(ntip - 1)], axis=1) * sum( + np.flip(Rchild2, axis=0)[(len(Rchild2) - (s - 1)):len(Rchild2), ] * B[0:(ntip - 1)], axis=1)) + + sum1 = np.zeros((ntip - 2, ntip - 2)) + sum1[0, 0:(ntip - 2)] = sum(Rchild1[0,] * B[0:(ntip - 1)]) * Rchild2[0, 0:(ntip - 2)] + for s in range(2, ntip - 1): + temp = sum((sum(Rchild1[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild2, axis=0)[ + len(Rchild2) - (s):len(Rchild2), + 0:(ntip - 2)].T).T, axis=0) + sum1[s - 1, 0:(ntip - 2)] = temp + + sum2 = np.zeros((ntip - 2, ntip - 2)) + sum2[0, 0:(ntip - 2)] = sum(Rchild2[0,] * B[0:(ntip - 1)]) * Rchild1[0, 0:(ntip - 2)] + for s in range(2, (ntip - 1)): + temp = sum((sum(Rchild2[0:(s), ] * B[0:(ntip - 1)], axis=1) * np.flip(Rchild1, axis=0)[ + len(Rchild1) - (s):len(Rchild1), + 0:(ntip - 2)].T).T, axis=0) + sum2[s - 1][0:(ntip - 2)] = temp + + R1 = Rchild1[0:(ntip - 1), 0:(ntip - 3)] + R1aug = np.zeros(L) + t = 0 + for i in range(R1.shape[1]): + R1aug[t:(t + R1.shape[0])] = R1[:, i] + t = t + (3 * R1.shape[0]) + + R2 = Rchild2[0:(ntip - 1), 0:(ntip - 3)] + R2aug = np.zeros(L) + t = 0 + for i in range(R2.shape[1]): + R2aug[t:(t + R2.shape[0])] = R2[:, i] + t = t + (3 * R2.shape[0]) + R1aug = R1aug.tolist() + R2aug = R2aug.tolist() + + ind = Len.index(L) + p = Primes[ind] + w = W[ind] + inv_w = Inv_W[ind] + inv_L = Inv_L[ind] + + R1aug = [int(x) for x in R1aug] + R2aug = [int(x) for x in R2aug] + + sum3_t0 = time.time() + Ivec1 = ntt.transform_fast(R1aug, w, p) + Ivec2 = ntt.transform_fast(R2aug, w, p) + Ivec3 = [(x * y) % p for (x, y) in zip(Ivec1, Ivec2)] + conv = ntt.inverse_transform(Ivec3, inv_w, inv_L, p) + pad = math.ceil(len(R1aug) / (3 * R1.shape[0])) + conv_pad = [0] * (pad * 3 * R1.shape[0] - len(R1aug)) + sum3 = np.array(conv + conv_pad) + # sum3 = np.array(sum3) + nrow = 3 * R1.shape[0] + ncol = len(sum3) // nrow + sum3 = np.transpose(np.reshape(sum3, (ncol, nrow)))[0:R1.shape[0], 0:R1.shape[1]][1:R1.shape[0], ] + z = np.zeros((R1.shape[0] - 1, 1)) + sum3 = np.concatenate((z, sum3), axis=1) + sum3_tf = time.time() + sum3_dt = sum3_tf - sum3_t0 + sum3_dt_list.append(sum3_dt) + R[v][1:(ntip - 1), 1:(ntip - 1)] = sum1 + sum2 + sum3 + return R, sum3_dt_list + + +def polynomial(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array + """ + Coef = [] + R = RF_NTT(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef + + +def polynomial_sum3_performance(tree, n): + """ + + :param tree: ETE tree + :param n: Number of tips on the ETE tree rooted + :return: 1 dimensional array, sum3 performance in 1 dimensional array in seconds + """ + Coef = [] + R, sum3_dt_list = RF_NTT_sum3_performance(tree, n) + for i in range(0, 2 * (n - 2), 2): + Coef.append(qmT(R, n, n - 3 - (i // 2))) + return Coef, sum(sum3_dt_list) \ No newline at end of file diff --git a/rfdist/build/lib/rfdist/__init__.py b/rfdist/build/lib/rfdist/__init__.py new file mode 100644 index 0000000..9714d64 --- /dev/null +++ b/rfdist/build/lib/rfdist/__init__.py @@ -0,0 +1,8 @@ +from .RF import internalchildren, internaledges, label_nodes, get_edges, RF, RsT, qmT, polynomial, Beta, \ + polynomial_sum3_performance, RF_sum3_performance +from .ntt import transform_fast, convolve_ntt, inverse_transform +from .RF_NTT import Len, Primes, G, W, Inv_W, Inv_L, RF_NTT, polynomial +name = "rfdist" +__all__ = ['RF', 'internalchildren', 'internaledges', 'label_nodes', 'get_edges', 'RsT', 'qmT', 'polynomial', 'Beta', + 'polynomial_sum3_performance', 'RF_sum3_performance', 'transform_fast', 'convolve_ntt', 'inverse_transform', + 'Len', 'Primes', 'G', 'W', 'Inv_W', 'Inv_L', 'RF_NTT', 'polynomial'] diff --git a/rfdist/build/lib/rfdist/ntt.py b/rfdist/build/lib/rfdist/ntt.py new file mode 100644 index 0000000..4260c7c --- /dev/null +++ b/rfdist/build/lib/rfdist/ntt.py @@ -0,0 +1,60 @@ +# Computes the forward number-theoretic transform of the given vector, +# with respect to the given primitive nth root of unity under the given modulus. +# The length of the vector must be a power of 2. + +def transform_fast(vector, root, mod): + transform_vec = vector + n = len(transform_vec) + levels = n.bit_length() - 1 + if 1 << levels != n: + raise ValueError("Length is not a power of 2") + + powtable = [] + temp = 1 + for i in range(n // 2): + powtable.append(temp) + temp = temp * root % mod + + def reverse(x, bits): + y = 0 + for i in range(bits): + y = (y << 1) | (x & 1) + x >>= 1 + return y + + for i in range(n): + j = reverse(i, levels) + if j > i: + transform_vec[i], transform_vec[j] = transform_vec[j], transform_vec[i] + size = 2 + while size <= n: + halfsize = size // 2 + tablestep = n // size + for i in range(0, n, size): + k = 0 + for j in range(i, i + halfsize): + l = j + halfsize + left = transform_vec[j] + right = transform_vec[l] * powtable[k] + transform_vec[j] = (left + right) % mod + transform_vec[l] = (left - right) % mod + k += tablestep + size *= 2 + return (transform_vec) + + +# Compute the inverse of ntt + +def inverse_transform(vector, inv_root, inv_L, mod): + outvec = transform_fast(vector, inv_root, mod) + return [(val * inv_L % mod) for val in outvec] + + +# Compute the convolution using ntt + +def convolve_ntt(vec1, vec2, root, inv_root, inv_L, mod): + temp1 = transform_fast(vec1, root, mod) + temp2 = transform_fast(vec2, root, mod) + temp3 = [(x * y % mod) for (x, y) in zip(temp1, temp2)] + conv = inverse_transform(temp3, inv_root, inv_L, mod) + return conv \ No newline at end of file diff --git a/rfdist/dist/rfdist-0.3.tar.gz b/rfdist/dist/rfdist-0.3.tar.gz deleted file mode 100644 index 452cefb..0000000 Binary files a/rfdist/dist/rfdist-0.3.tar.gz and /dev/null differ diff --git a/rfdist/dist/rfdist-0.9-py2-none-any.whl b/rfdist/dist/rfdist-0.9-py2-none-any.whl new file mode 100644 index 0000000..a52f2ae Binary files /dev/null and b/rfdist/dist/rfdist-0.9-py2-none-any.whl differ diff --git a/rfdist/dist/rfdist-0.9-py3-none-any.whl b/rfdist/dist/rfdist-0.9-py3-none-any.whl new file mode 100644 index 0000000..0ff5576 Binary files /dev/null and b/rfdist/dist/rfdist-0.9-py3-none-any.whl differ diff --git a/rfdist/dist/rfdist-0.9.tar.gz b/rfdist/dist/rfdist-0.9.tar.gz new file mode 100644 index 0000000..962d564 Binary files /dev/null and b/rfdist/dist/rfdist-0.9.tar.gz differ diff --git a/rfdist/rfdist.egg-info/PKG-INFO b/rfdist/rfdist.egg-info/PKG-INFO index b4441a3..c37a1c5 100644 --- a/rfdist/rfdist.egg-info/PKG-INFO +++ b/rfdist/rfdist.egg-info/PKG-INFO @@ -1,10 +1,13 @@ -Metadata-Version: 1.0 +Metadata-Version: 1.1 Name: rfdist -Version: 0.3 -Summary: UNKNOWN +Version: 0.9 +Summary: Python package implementing an improved version of the RF distribution computation by Bryant et al. Home-page: UNKNOWN -Author: UNKNOWN +Author: Frank Hui Author-email: UNKNOWN License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent diff --git a/rfdist/rfdist.egg-info/SOURCES.txt b/rfdist/rfdist.egg-info/SOURCES.txt index 6586dcd..1707ba8 100644 --- a/rfdist/rfdist.egg-info/SOURCES.txt +++ b/rfdist/rfdist.egg-info/SOURCES.txt @@ -1,5 +1,9 @@ README.md setup.py +rfdist/RF.py +rfdist/RF_NTT.py +rfdist/__init__.py +rfdist/ntt.py rfdist.egg-info/PKG-INFO rfdist.egg-info/SOURCES.txt rfdist.egg-info/dependency_links.txt diff --git a/rfdist/rfdist.egg-info/requires.txt b/rfdist/rfdist.egg-info/requires.txt index b279fcc..a333745 100644 --- a/rfdist/rfdist.egg-info/requires.txt +++ b/rfdist/rfdist.egg-info/requires.txt @@ -1,3 +1,2 @@ ete3 -logging numpy diff --git a/rfdist/rfdist.egg-info/top_level.txt b/rfdist/rfdist.egg-info/top_level.txt index 8b13789..77fa1ad 100644 --- a/rfdist/rfdist.egg-info/top_level.txt +++ b/rfdist/rfdist.egg-info/top_level.txt @@ -1 +1 @@ - +rfdist diff --git a/rfdist/rfdist/RF.py b/rfdist/rfdist/RF.py old mode 100644 new mode 100755 diff --git a/rfdist/rfdist/RF_NTT.py b/rfdist/rfdist/RF_NTT.py old mode 100644 new mode 100755 diff --git a/rfdist/rfdist/__init__.py b/rfdist/rfdist/__init__.py old mode 100644 new mode 100755 index de64639..9714d64 --- a/rfdist/rfdist/__init__.py +++ b/rfdist/rfdist/__init__.py @@ -2,7 +2,7 @@ polynomial_sum3_performance, RF_sum3_performance from .ntt import transform_fast, convolve_ntt, inverse_transform from .RF_NTT import Len, Primes, G, W, Inv_W, Inv_L, RF_NTT, polynomial - +name = "rfdist" __all__ = ['RF', 'internalchildren', 'internaledges', 'label_nodes', 'get_edges', 'RsT', 'qmT', 'polynomial', 'Beta', 'polynomial_sum3_performance', 'RF_sum3_performance', 'transform_fast', 'convolve_ntt', 'inverse_transform', 'Len', 'Primes', 'G', 'W', 'Inv_W', 'Inv_L', 'RF_NTT', 'polynomial'] diff --git a/rfdist/rfdist/__pycache__/RF.cpython-37.pyc b/rfdist/rfdist/__pycache__/RF.cpython-37.pyc deleted file mode 100644 index eabc66a..0000000 Binary files a/rfdist/rfdist/__pycache__/RF.cpython-37.pyc and /dev/null differ diff --git a/rfdist/rfdist/__pycache__/RF_NTT.cpython-37.pyc b/rfdist/rfdist/__pycache__/RF_NTT.cpython-37.pyc deleted file mode 100644 index 369fe2d..0000000 Binary files a/rfdist/rfdist/__pycache__/RF_NTT.cpython-37.pyc and /dev/null differ diff --git a/rfdist/rfdist/__pycache__/__init__.cpython-37.pyc b/rfdist/rfdist/__pycache__/__init__.cpython-37.pyc deleted file mode 100644 index 9d5b972..0000000 Binary files a/rfdist/rfdist/__pycache__/__init__.cpython-37.pyc and /dev/null differ diff --git a/rfdist/rfdist/__pycache__/ntt.cpython-37.pyc b/rfdist/rfdist/__pycache__/ntt.cpython-37.pyc deleted file mode 100644 index f097aae..0000000 Binary files a/rfdist/rfdist/__pycache__/ntt.cpython-37.pyc and /dev/null differ diff --git a/rfdist/rfdist/ntt.py b/rfdist/rfdist/ntt.py old mode 100644 new mode 100755 diff --git a/rfdist/setup.py b/rfdist/setup.py old mode 100644 new mode 100755 index 2d26fe9..d67ac49 --- a/rfdist/setup.py +++ b/rfdist/setup.py @@ -1,6 +1,15 @@ from setuptools import setup +import setuptools setup(name='rfdist', - version='0.3', - install_requires=['numpy','ete3','logging'] + version='0.9', + author='Frank Hui', + description="Python package implementing an improved version of the RF distribution computation by Bryant et al.", + packages=setuptools.find_packages(), + install_requires=['numpy','ete3'], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ] ) diff --git a/rfdistr/ntt_fromR.py b/rfdistr/.ntt_fromR.py similarity index 97% rename from rfdistr/ntt_fromR.py rename to rfdistr/.ntt_fromR.py index 714a6ee..114f10e 100644 --- a/rfdistr/ntt_fromR.py +++ b/rfdistr/.ntt_fromR.py @@ -1,6 +1,7 @@ -import ntt +from rfdist import ntt import json import time +import os.path Len=[8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072] Primes=[113,115792089237316195423570985008687907853269984665640564039457584007913129637873, @@ -62,7 +63,7 @@ 2037004893632210474603310977018868984748962115156511416403800420355825477294773422841921621, 2135970739633099406506331558604044839577416110609503442457036518698624890730242443329312596558002] -filepath = 'R/testNTT.txt' +filepath = os.path.expanduser("~") + '/testNTT.txt' vec = [] with open(filepath) as fp: line = fp.readline() @@ -87,5 +88,5 @@ conv.append(T) -with open('R/outNTT.txt', 'w') as outNTT: +with open(os.path.expanduser("~") + '/outNTT.txt', 'w') as outNTT: outNTT.writelines("%s\n" % i for i in conv) diff --git a/rfdistr/DESCRIPTION b/rfdistr/DESCRIPTION index 61b584e..52299eb 100644 --- a/rfdistr/DESCRIPTION +++ b/rfdistr/DESCRIPTION @@ -9,3 +9,7 @@ Description: More about what it does (maybe more than one line) License: What license is it under? Encoding: UTF-8 LazyData: true +Imports: + ape, + phangorn, + readtext diff --git a/rfdistr/R/RF_NTTfromPy.R b/rfdistr/R/RF_NTTfromPy.R index d3e4408..0b76bdf 100644 --- a/rfdistr/R/RF_NTTfromPy.R +++ b/rfdistr/R/RF_NTTfromPy.R @@ -5,6 +5,7 @@ library("phangorn") library("readtext") options("digits"=22) options("scipen"=100) + #Beta function Beta=function(m){ if(m<=0){ans=1} @@ -46,7 +47,7 @@ internalchildren <- function(tree,v,ntip){ return(result) } -ntt_RF_Convolve=function(tree,n){ +ntt_RF_Convolve=function(tree,n,verbose=0){ tt=0 t=(n-4)*(n-2)*3 L= 2^ceiling(log(t)/log(2)) @@ -110,20 +111,44 @@ ntt_RF_Convolve=function(tree,n){ } #write(L,"testNTT.txt",ncolumns=L,append = TRUE) - write(R1aug,"testNTT.txt",ncolumns=L,append = TRUE) - write(R2aug,"testNTT.txt",ncolumns=L,append = TRUE) + write(R1aug,"~/testNTT.txt",ncolumns=L,append = TRUE) + write(R2aug,"~/testNTT.txt",ncolumns=L,append = TRUE) + + #If python script is changed, increment this number. + python_ntt_version <- 4 #read the output of Python + current_filename <- paste(".ntt_fromR",paste0(python_ntt_version,sep = ""),".py", sep = "") + if(length(which(list.files("~",all.files=TRUE) == current_filename)) == 0){ + if(verbose == 1){print("ntt_fromR is old, deleted, or was not installed. Installing .ntt_fromR.")} + download.file('https://raw.githubusercontent.com/FrankWhoee/rfdistr/master/.ntt_fromR.py', destfile = paste("~/",current_filename, sep = ""), method="curl") + previous_filename <- paste(".ntt_fromR",paste0(python_ntt_version - 1),".py", sep = "") + if(length(which(list.files("~",all.files=TRUE) == previous_filename)) != 0){ + if(verbose == 1){print("Older version of ntt_fromR found, automatically removing")} + file.remove(paste("~/",previous_filename, sep = "")) + } + } + if(verbose == 1){print("ntt_fromR file found. Checking for dependencies...")} + + if(length(which(grepl("rfdist",system("pip list", intern = TRUE)) == TRUE)) == 0){ + if(verbose == 1){print("pip package rfdist was deleted or not installed. Installing rfdist.")} + system('pip install rfdist') + } + + if(verbose == 1){print("Dependency installed.") + print("Attempting to run:") + print(current_filename)} + system(paste('python ~/',current_filename, sep = "")) - system('python ntt_fromR.py') - U=as.matrix(read.csv("outNTT.txt",header = FALSE, quote="")) + U=as.matrix(read.csv("~/outNTT.txt",header = FALSE, quote="")) Matc=ceiling(length(R1aug)/(3*nrow(R1))) sum3=matrix(c(U,numeric(Matc*3*nrow(R1)-length(R1aug))),nrow=3*nrow(R1))[1:nrow(R1),1:ncol(R1)] sum3=cbind(array(0, dim=c(nrow(R1)-1,1)),sum3[2:nrow(R1),]) R[[v]][2:(ntip-1),2:(ntip-1)]=sum1+sum2+sum3 - file.remove("testNTT.txt") + file.remove("~/testNTT.txt") + file.remove("~/outNTT.txt") } } @@ -155,9 +180,9 @@ qmT=function(R,n,m){ } #this function computes the RF distribution -ntt_polynomial=function(tree,n){ +ntt_polynomial=function(tree,n,verbose=0){ Coef=numeric() - R=ntt_RF_Convolve(tree,n) + R=ntt_RF_Convolve(tree,n,verbose) for (i in seq(0,2*(n-3),2)) { Coef=c(Coef,qmT(R,n,n-3-(i/2))) } diff --git a/run_RF.py b/run_RF.py index fab3d28..f7aa110 100644 --- a/run_RF.py +++ b/run_RF.py @@ -1,9 +1,9 @@ from ete3 import Tree -import RF +from rfdist.rfdist.RF import polynomial def test(newick): tree = Tree(newick) - print(RF.polynomial(tree, tree.get_leaves().__len__() + 1)) + print(polynomial(tree, tree.get_leaves().__len__() + 1)) # 8 tip # newick ="(((t2:0.8727817549,(t1:0.4722830437,t3:0.5129283739):0.02436175128):0.7209995012,(t4:0.06786046806,t6:0.7481567271):0.07053013402):0.5619613971,((t7:0.1686784215,t8:0.2114313007):0.1445601732,t5:0.8294858425):0.6152706817);"