-
Notifications
You must be signed in to change notification settings - Fork 0
/
mimic.py
105 lines (77 loc) · 2.85 KB
/
mimic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
'''
Created on Mar 24, 2014
@author: bonino
Copyright (c) 2014 Dario Bonino
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License
'''
import string, re, random
def build_mimic(text):
'''
Builds a mimic dictionary from the given text
'''
# to lower case
text = string.lower(text)
# clean
text = re.sub("\\W", " ", text)
text = re.sub("\s+", " ", text)
# split in words
words = text.split(" ")
# the mimic vocabulary
mimic_vocabulary = {}
for i in range(1, len(words)):
# get the list of words associated to the current word
next_words = mimic_vocabulary.get(words[i - 1])
# check not empty
if(next_words != None):
# update the list
next_words.append(words[i])
else:
mimic_vocabulary[words[i - 1]] = [words[i]]
return mimic_vocabulary
def get_mimic_text(mimic_vocabulary, seed_word, word_count):
'''
Given a seed word, a word count and a mimic vocabulary generates a
mimicked text as long as word_count starting from the given seed_word
'''
# initialize the resulting word
resulting_text = seed_word
# get the next seeds
next_seeds = mimic_vocabulary.get(seed_word);
# if there are seeds continue
if(next_seeds != None):
# counter
i = 0
# do until the included word count is equal to the required one
while i < word_count:
# get the next seed
seed = random.choice(next_seeds)
# get the next set of following words
next_seeds = mimic_vocabulary.get(seed)
# compose the mimicked text
resulting_text = resulting_text + " " + seed
# update the counter
i += 1
# check if the generation should stop here
if(next_seeds == None):
break
return resulting_text
if __name__ == '__main__':
#open the file
txt_file = open("alice.txt")
#build the mimic vocabulary
mimic_voc = build_mimic(txt_file.read())
#read seeds and generate texts until exit
seed=""
while(seed!="exit"):
#get the seed word
seed = raw_input("Insert seed:\n>")
#print the mimicked text
print get_mimic_text(mimic_voc, seed, 40)