-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.py
100 lines (84 loc) · 2.51 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import json
from pearhash import PearsonHasher
hasher = PearsonHasher(1)
import numpy
def generate_triplets(inp):
res = [
inp[0:1]+inp[1:2]+inp[2:3],
inp[0:1]+inp[1:2]+inp[3:4],
inp[0:1]+inp[1:2]+inp[4:5],
inp[0:1]+inp[2:3]+inp[3:4],
inp[0:1]+inp[2:3]+inp[4:5],
inp[0:1]+inp[3:4]+inp[4:5]
]
return res
def get_hamming_distance(inp1, inp2):
c=0
for (i,j) in zip(inp1,inp2):
if i != j:
c = c + 1
return c
def main():
f = open('./myjson.json')
templates = json.load(f)
allhashed = {}
allraw = {}
alltemplates = []
for sample in templates:
template = templates[sample]
alltemplates.append((sample, template))
alltemplates = sorted(alltemplates)
for (name,template) in alltemplates:
print(name)
template = "".join(str(e) for e in template)
allraw[name] = template
b = bytearray()
i = int(template,2)
while i:
b.append(i & 0xff)
i >>= 8
b = bytes(b[::-1])
accm = [0 for i in range(256)]
for i in range(1200):
if i+5 <= 1200:
s = b[i:i+5]
else:
s = b[i:1200] + b[0:i+5-1200]
triplets = generate_triplets(s)
for tri in triplets:
val = int(hasher.hash(tri).hexdigest(),16)
accm[val] = accm[val] + 1
# print(accm)
q1 = numpy.quantile(accm, 0.25)
q2 = numpy.quantile(accm, 0.50)
q3 = numpy.quantile(accm, 0.75)
ans = ''
for i in accm:
if i <= q1:
ans = ans + '00'
elif i <= q2:
ans = ans + '01'
elif i <= q3:
ans = ans + '10'
else:
ans = ans + '11'
# print(ans)
allhashed[name] = ans
allhds1 = []
allhds2 = []
for sample1 in allhashed:
hds1 = []
hds2 = []
for sample2 in allhashed:
hd1 = get_hamming_distance(allhashed[sample1], allhashed[sample2])
hd2 = get_hamming_distance(allraw[sample1], allraw[sample2])
hds1.append(hd1)
hds2.append(hd2)
allhds1.append(hds1)
allhds2.append(hds2)
for hds in allhds1:
print(hds)
for hds in (numpy.array(allhds2)>2250):
print(1*hds)
if __name__=="__main__":
main()