-
Notifications
You must be signed in to change notification settings - Fork 0
/
neuralnine.py
44 lines (31 loc) · 930 Bytes
/
neuralnine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import re
import json
from mrjob.job import MRJob
from mrjob.step import MRStep
class WordCounter(MRJob):
def mapper(self, key, value):
review = json.loads(value)
review_text = review["reviewText"]
tokens = re.findall(r"\b\w+\b", review_text.lower())
for token in tokens:
yield token, 1
def combiner(self, key, values):
yield key, sum(values)
def reducer(self, key, values):
yield None, (sum(values), key)
def reducer_sorter(self, key, values):
for count, _key in sorted(values):
yield count, _key
def steps(self):
return [
MRStep(
mapper=self.mapper,
combiner= self.combiner,
reducer=self.reducer
),
MRStep(
reducer=self.reducer_sorter
)
]
if __name__ == "__main__":
WordCounter.run()