-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_1.py
53 lines (41 loc) · 1.41 KB
/
file_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
import random
import pickle
import nltk
read_reviews = open("battery-life_netbook_1005ha.txt").read()
all_words = []
documents = []
#taking only nouns, verbs and adjectives
allowed_word_types_1 = ["V", "R", "N", "J"]
for p in read_reviews.split('\n'):
documents.append( (p, "pos") )
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[0] not in all_words:
if w[1][0] in allowed_word_types_1:
all_words.append(w[0].lower())
all_words = nltk.FreqDist(all_words)
#print(all_words[:3000])
f = open ("new_dictionary_name.txt","a+")
for a in all_words:
for synset in wordnet.synsets(a):
for lemma in synset.lemmas():
f.write(''.join(str(e) for e in(lemma.name())))
f.write ("\n")
f.close()
f = open ("new_dictionary_definition.txt","a+")
for a in all_words:
for synset in wordnet.synsets(a):
for lemma in synset.lemmas():
f.write(''.join(str(e) for e in(synset.definition())))
f.write ("\n")
f.close()
f = open ("new_dictionary_examples.txt","a+")
for a in all_words:
for synset in wordnet.synsets(a):
for lemma in synset.lemmas():
f.write(''.join(str(e) for e in(synset.examples())))
f.write ("\n")
f.close()