-
Notifications
You must be signed in to change notification settings - Fork 0
/
cryptsession.py
215 lines (201 loc) · 9.63 KB
/
cryptsession.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# -*- coding: utf-8 -*-
# Author: Tilman Bender [email protected]
import string
import pprint
import re
import sys
from mapper import Mapper
from reference import ReferenceModel
from analysis import AnalysisModel
class Cryptsession(object):
def __init__(self):
#TODO Move frequency stuff to two separate models of same type. one for reference and one for ciphertext
#map that stores the frequeny model for the plainText
#self.symbol_ref = {}
#self.bigram_ref = {}
#load the reference frequencies from frequencies/<language>/
self.reference = ReferenceModel.for_language("english")
#map that stores the absolute frequencies of letters in the ciphertext
#init the dictionary with 0s
# self.symbol_counts = dict.fromkeys(unicode(string.ascii_uppercase),0.0)
# #map that stores the frequency model for the ciphertext
# #init the dictionary with 0s
# self.symbol_freqs = dict.fromkeys(unicode(string.ascii_uppercase),0.0)
#map to store the assumed mappings between symbol in ciphertext and symbol in plaintext
#self.substitutions = {}
self.mapper = Mapper()
#blacklist of punctuation characters
# self.blacklist = [u"É",u"!", u".", u",", u"|", u"?", u":", u";",u"+", u"-",u"\"",u"§",u"$",u"%",u"&",u"/",u"(",u")",u"=",u"[",u"]",u"{",u"}",u"@",u"1",u"2",u"3",u"4",u"5",u"6",u"7",u"8",u"9",u"0"]
# #map that stores absolute word frequencies
# self.word_counts = {}
# self.word_freqs = {}
# self.bigram_counts = {}
# self.bigram_freqs = {}
# self.trigram_counts = {}
# self.trigram_freqs = {}
def show_most_frequent_symbols(self,n=5):
smbls = self.ciphertext.get_letter_freqs()
smbls = sorted(smbls, key=lambda x: x[1], reverse=True)
print "=== %d most frequent symbols ===" % n
for i in range(n):
symbol = smbls[i][0]
out = u"{} ({:.2f} %)".format(symbol.upper(), smbls[i][1])
#if there is a known mapping, print it
if self.mapper.has_mapping_from(symbol):
plain = self.mapper.get_mapping_from(symbol)
out += u" --> {} ({:.2f} %)".format(plain.lower(), self.reference.get_letter_freq(plain))
print out
def show_most_frequent_bigrams(self,n=5):
bgrms = self.ciphertext.get_bigram_freqs()
bgrms = sorted(bgrms, key=lambda x: x[1], reverse=True)
print "=== %d most frequent bigrams ===" %n
for i in range(n):
bgrm = bgrms[i][0]
out = u"{} ({:.2f} %)".format(bgrm.upper(), bgrms[i][1])
#print bigram mapping (using current mappings)
plainbgrm=u""
#for each letter in the bigram
for symbol in bgrm:
#check if we have a mapping
if self.mapper.has_mapping_from(symbol):
plainbgrm+=self.mapper.get_mapping_from(symbol)
else:
#if we do not have a mapping use ?
plainbgrm+=u"?"
#if none of the bigram letters has a mapping don't show bigram-mapping
if plainbgrm.count(u"?") < len(bgrm):
out+= u" --> {}".format(plainbgrm.lower())
print out
def show_most_frequent_trigrams(self, n=5):
trgrms = self.ciphertext.get_trigram_freqs()
trgrms = sorted(trgrms, key=lambda x: x[1], reverse=True)
print "=== %d most freqent trigrams ===" %n
for i in range(n):
trgrm = trgrms[i][0]
out = u"{} ({:.2f} %)".format(trgrm.upper(), trgrms[i][1])
#print trigram mapping (using current mappings)
plaintrgrm=u""
#for each letter in the trigram
for symbol in trgrm:
#check if we have a mapping
if self.mapper.has_mapping_from(symbol):
plaintrgrm+=self.mapper.get_mapping_from(symbol)
else:
#if we do not have a mapping use ?
plaintrgrm+=u"?"
#if none of the trigram letters has a mapping don't show trigram-mapping
if plaintrgrm.count(u"?") < len(trgrm):
out+= u" --> {}".format(plaintrgrm.lower())
print out
def show_most_frequent_words(self, n=5):
cwords = self.c.word_counts.items()
cwords = sorted(cwords, key=lambda x: x[1], reverse=True)
print "=== %d most frequent words ===" % n
for i in range(n):
word = cwords[i][0]
out = u"{} ({:.2f} %)".format(word.upper(), cwords[i][1])
#print word mapping (using current mappings)
plainword=u""
#for each letter in the word
for symbol in word:
#check if we have a mapping
if self.mapper.has_mapping_from(symbol):
plainword+=self.mapper.get_mapping_from(symbol)
else:
#if we do not have a mapping use ?
plainword+=u"?"
#if not at least half of the letters have a mapping don't show word-mapping
if plainword.count(u"?") <= len(word)/2:
out+= u" --> {}".format(plainword.lower())
print out
def show_plaintext(self):
decrypted = ''
for symbol in self.ciphertext.text:
#check if there is a substitution-rule
if self.mapper.has_mapping_from(symbol):
#use it
decrypted += self.mapper.get_mapping_from(symbol).lower()
else:
#use letter from ciphertext instead
decrypted += symbol
print decrypted
def show_menu(self):
choice =u''
while True:
print "======== Available Actions ========"
print "[0] Read ciphertext from file"
print "[1] Show ciphertext"
#print "[2] Analyse ciphertext"
print "[3] Show reference frequencies (symbols)"
#TODO Show absolute frequencies
print "[4] Show ciphertext frequencies (symbols)"
print "[5] Shwo n most frequent symbols"
print "[6] Show n most frequent bigrams"
print "[7] Show n most frequent trigrams"
print "[8] Show n most frequent words"
print "[9] Create n substitution rules using symbol-frequencies "
print "[10] Define substitution rule for ciphertext -> plaintext"
print "[11] Remove substitution rule"
print "[12] Show substitution rules"
print "[13] Show decrypted text (uses substitution rules)"
print "==================================="
choice = input("Please choose: ")
try:
if choice == 0:
fn = raw_input("Path to ciphertext: ")
lan = raw_input("Language of ciphertext (german/english): ")
self.reference = ReferenceModel.for_language(lan)
self.ciphertext = AnalysisModel.from_file(fn,self.reference)
self.show_most_frequent_symbols()
self.show_most_frequent_bigrams()
self.show_most_frequent_trigrams()
elif choice == 1:
self.ciphertext.show_text()
elif choice == 2:
self.analyze()
elif choice == 3:
self.reference.show_letter_freqs()
elif choice == 4:
self.ciphertext.show_letter_freqs()
elif choice == 5:
n = raw_input("n: ").decode(sys.stdout.encoding)
self.show_most_frequent_symbols(int(n))
elif choice == 6:
n = raw_input("n: ").decode(sys.stdout.encoding)
self.show_most_frequent_bigrams(int(n))
elif choice == 8:
n = raw_input("n: ").decode(sys.stdout.encoding)
self.show_most_frequent_words(int(n))
elif choice == 7:
n = raw_input("n: ").decode(sys.stdout.encoding)
self.show_most_frequent_trigrams(int(n))
elif choice == 9:
n = raw_input("n: ").decode(sys.stdout.encoding)
self.mapper.generate_mappings(self.reference,self.ciphertext,int(n))
elif choice == 10:
ciph = raw_input("From: ").decode(sys.stdout.encoding)
plain = raw_input("To: ").decode(sys.stdout.encoding)
self.mapper.add_mapping(ciph, plain)
elif choice == 11:
ciph = raw_input("Remove substitution for which letter?").decode(sys.stdout.encoding)
self.mapper.remove_mapping(ciph)
elif choice == 12:
self.mapper.show_mappings()
elif choice == 13:
self.show_plaintext()
elif choice == 14:
fn = raw_input("filename: ").decode(sys.stdout.encoding)
self.mapper.load_mappings(fn)
elif choice == 15:
fn = raw_input("filename: ").decode(sys.stdout.encoding)
self.mapper.store_mappings(fn)
elif choice == 16:
self.mapper.generate_candidates(self.reference, self.ciphertext)
elif choice == 'q':
system.exit(0)
else:
print "Unknown option"
except:
raise
session1 = Cryptsession()
session1.show_menu()