-
Notifications
You must be signed in to change notification settings - Fork 0
/
symbolic.py
238 lines (184 loc) · 7.01 KB
/
symbolic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import os
import numpy as np
import sys
# This file contains the 4 methods that will allow to go from numerical data to symbolic data
# TODO : New parameter for sax : vary the distance between two horizontal separation in sax/set-based
index_to_attribute = {0:'x', 1:'y', 2:'z', 3:"palm roll", 4:"thumb", 5:"index", 6:"middle", 7:"ring"}
def home_made(input_file, output_file, gap):
"""
This "home made" function transform a numerical sequence into a symbolic one. For a value it takes both its previous and next value to calculate the variation
If this result is greater than a gap defined in the parameters, it will label the value as either I (incrementation) or D (decrementation).
If this result is lesser than the gap, then the valus is labeled C (constant)
input_file: file from where the data is read
output_file: file where the data is written
gap: value of the gap to consider
"""
dict_values = from_file_to_dict(input_file)
new_dict_values = dict()
for key, values in dict_values.items():
new_dict_values[key] = []
for idx, value in enumerate(values):
if idx != 1 and idx != len(values)-1:
diff = values[idx+1]-values[idx-1]
if abs(diff) < gap:
new_dict_values[key].append('C')
elif diff > 0:
new_dict_values[key].append('I')
else:
new_dict_values[key].append('D')
from_dict_to_file(output_file, new_dict_values)
def sax(input_file, output_file, height, diffs=[]):
"""
This function is a simplified use of the SAX method to transform numerical data into symbolic one.
See http://cs.gmu.edu/~jessica/SAX_DAMI_preprint.pdf for details
Right now all separation are equally distant
input_file: file from where the data is read
output_file: file where the data is written
height: number of horizontal separation
"""
dict_values = from_file_to_dict(input_file)
letters = ['A','B','C','D','E','F','G','H','I','J','K']
heights = [9,9,5,8,8,6,6,6]
for key, values in dict_values.items():
if key<=3:
base = 2
add = 1
else:
base = 0.751
add = 0
if len(diffs) == 0:
gap = base/(heights[key]+1)#gap = base/(height+1)
else:
tot = 0
gap = []
for idx, el in enumerate(diffs):
tot += el
gap.append(base*tot)
for idx, value in enumerate(values):
if len(diffs) > 0:
for idx2, a in enumerate(gap):
if (value+add)<a:
dict_values[key][idx] = letters[idx2]
break
else:
dict_values[key][idx] = letters[int((value+add)/gap)]
from_dict_to_file(output_file, dict_values)
def sax_derivate(input_file, output_file, height):
"""
This function is similar to the sax one, but here we use SAX on the derivate of the values (for a value, variance between the previous and next values)
input_file: file from where the data is read
output_file: file where the data is written
height: number of horizontal separation
"""
dict_values = from_file_to_dict(input_file)
letters = ['A','B','C','D','E','F','G','H','I','J','K']
new_dict_values = dict()
for i in range(0,8):
new_dict_values[i] = list()
for key, values in dict_values.items():
der = []
if key == 3:
print(values)
for idx, value in enumerate(values):
if idx != 1 and idx != len(values)-1:
der.append(values[idx+1]-values[idx-1])
if key == 3:
print(der)
gap = (max(der)-min(der))/height
for idx, v in enumerate(der):
if gap==0: # that's when there is not variation = we don't care about the letter so let's just put a A
new_dict_values[key].append(letters[0])
else:
new_dict_values[key].append(letters[int((abs(min(der))+v)/gap)])
from_dict_to_file(output_file, new_dict_values)
def set_based(input_file, output_file, height, width, diffs=[]):
"""
This function is a simplified use of the Set-based method to transform numerical data into symbolic one.
See http://dl.acm.org/citation.cfm?id=2882963 for details
Right now all 'set' have the same height and width
input_file: file from where the data is read
output_file: file where the data is written
height: number of horizontal separation
width: number of vertical separation
"""
dict_values = from_file_to_dict(input_file)
width_gap = 30/width
letters = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y']
for key, values in dict_values.items():
if key<=3:
base = 2
add = 1
else:
base = 0.75
add = 0
height_gap = 2/height
if len(diffs) == 0:
height_gap = 2/height
else:
tot = 0
height_gap = []
for idx, el in enumerate(diffs):
tot += el
height_gap.append(base*tot)
for idx, value in enumerate(values):
if len(diffs) > 0:
for idx2, a in enumerate(height_gap):
if (value+base)<a:
dict_values[key][idx] = letters[int(idx/width_gap)*width + idx2]
break
else:
dict_values[key][idx] = letters[int(idx/width_gap)*width + int((value+base)/height_gap)]
from_dict_to_file(output_file, dict_values)
def from_file_to_dict(input_file):
"""
This function take a file in argument and will write its content into a dictionnart to easy the data management
input_file: name of the file to read the data from
"""
file = open(input_file)
dict_values = {k: [] for k in range(8)}
for line in file:
s = line.split(" ")
s.pop(0) # first column only indicate line's number
s.remove('\n')
for idx, a in enumerate(s):
dict_values[idx].append(float(a))
file.close
return dict_values
def from_dict_to_file(output_file, dict_values):
"""
This function take a dictionnary of values in argument and will write its content into a file respecting the way the data files are constructed
output_file: name of the file to write the data into
dict_values: values to write in the file
"""
file = open(output_file, 'w')
for i in range(0,len(dict_values[0])):
line = ""
for key, value in dict_values.items():
line += str(value[i]) + " "
file.write(str(i) + " " + line + "\n")
file.close
if __name__ == "__main__":
"""
The script is used as follow : python symbolic.py [sax] [sax_der] [home] [set] depending of whichever kind of method you want to use
"""
if len(sys.argv) > 1:
for file in os.listdir("data"):
if file.endswith(".dat"):
for i in range(1,10):
if "sax" in sys.argv:
sax("data/" + file, "data/sax/" + file.replace(".dat", "") + "_sax_" + str(i) + ".dat", i)
if "sax_der" in sys.argv:
sax_derivate("data/" + file, "data/sax_derivate/" + file.replace(".dat", "") + "_sax_derivate_" + str(i) + ".dat", i)
if "home" in sys.argv:
for i in np.arange(0.05,0.25,0.05):
home_made("data/" + file, "data/hm/" + file.replace(".dat", "") + "_hm_" + str(i) + ".dat", i)
if "set" in sys.argv:
cpt = 0
for i in range(2,5):
for j in range(2,5):
cpt+=1
set_based("data/" + file, "data/set_based/" + file.replace(".dat", "") + "_setbased_" + str(cpt) + ".dat", i, j)
else:
print("Too few arugments, enter at least one of those : python symbolic.py [sax] [sax_der] [home] [set]")
#sax("data/come_1.dat", "data/sax_aaaaaaaaatest_.dat", 1, [0.6,0.4])
#set_based("data/come_1.dat", "data/set_aaaaaaaaatest_.dat", 2, 2, [0.6,0.4])