-
Notifications
You must be signed in to change notification settings - Fork 1
/
RegLinkerIO.py
141 lines (92 loc) · 3.71 KB
/
RegLinkerIO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import networkx as nx
def filter_comments(handle):
return (line for line in handle if not line.startswith("#"))
def tokenize(line):
return [x.strip() for x in line.split("\t")]
def add_edge(G, toks, label_col=None, weight_col=None, label="l", weight="w"):
'''
Accessory method for adding an edge to G.
Note: if the edge has already been added, the weight and the label
of the edge will be replaced.
'''
tail = toks[0]
head = toks[1]
kwargs = {}
if label_col != None:
kwargs[label] = toks[label_col]
if weight_col != None:
kwargs[weight] = float(toks[weight_col])
G.add_edge(tail, head, **kwargs)
def read_graph(handle, label_col=None, weight_col=None, label="l", weight="w"):
'''
Graphs should be in tab-separated edge-list format, where the first two
columns of a row correspond to the tail and head nodes of an edge.
:param handle: graph file handle
:param label_col: column to be read as labels for the edges (if any)
Note: 0-indexed
:param weight_col: column to be read as weight for the edges (if any)
Note: 0-indexed
:param label: name to be used for the edge label field
:param weight: name to be used fot the weight label field
:returns: NetworkX DiGraph
'''
G = nx.DiGraph()
lines = filter_comments(handle)
for line in lines:
add_edge(G, tokenize(line), label_col, weight_col, label, weight)
return G
def read_node_types(handle, type_col=1,
source_kws=["source"], target_kws=["target"]):
'''
Read sources and targets from a tab-separated file, one node per line.
If a node is listed as both a source and a target, it will be returned
as such.
:param handle: node type file handle
:param type_col: 0-indexed column to be interpreted as the node type
:param source_kws: list of type keywords indicating sources
:param source_kws: list of type keywords indicating targets
:returns: generators for sources and targets
'''
lines = filter_comments(handle)
sources = []
targets = []
for line in lines:
toks = tokenize(line)
node = toks[0]
node_type = toks[type_col]
if node_type in source_kws:
sources.append(node)
elif node_type in target_kws:
targets.append(node)
return sources, targets
def write_edge_file(handle, results):
for i, (edge, _, _, _, _, weight, rank) in enumerate(results):
if i != 0:
handle.write("\n")
handle.write(str(edge[0]) + "\t")
handle.write(str(edge[1]) + "\t")
handle.write(str(rank) + "\t")
handle.write(str(weight))
def write_projected_edge_file(handle, results):
for i, (edge, _, _, _, _, weight, rank) in enumerate(results):
if i != 0:
handle.write("\n")
handle.write(str(edge[0][0]) + "\t")
handle.write(str(edge[1][0]) + "\t")
handle.write(str(rank) + "\t")
handle.write(str(weight))
def write_paths_file(handle, results):
for i, (_, path, g_path, h_path, labeled_path, weight, rank) \
in enumerate(results):
if i != 0:
handle.write("\n")
path_string = "|".join([str(compound_node) for compound_node in path])
g_path_string = "|".join([str(node) for node in g_path])
h_path_string = "|".join([str(node) for node in h_path])
labeled_string = ", ".join([str(edge) for edge in labeled_path])
handle.write(str(rank) + "\t")
handle.write(str(weight) + "\t")
handle.write(path_string + "\t")
handle.write(g_path_string + "\t")
handle.write(h_path_string + "\t")
handle.write(labeled_string)