Simple Map Reduce package in python that supports parallel processing
pyParallelMR leverages the fact that greater speeds in computation can be achieved using more memory.
It is not applicable to problems which require optimized solutions (IoT for example).
A simple queue based map reduce engine which executes tasks in a batch wise parallel manner.
from pyMR import Master
from collections import defaultdict
# Map logic for word count
def map_wc (lines ):
word_count = defaultdict (int )
for line in lines :
for word in line .split ():
word_count [word ] += 1
return word_count
# Reduce logic for word count
def red_wc (kv1 , kv2 ):
for word , count in kv1 .items ():
kv2 [word ] += count
return kv2
def main ():
file_path = '/path/to/file'
master = Master (num_workers = 9 ) # Initialize engine
master .create_job (data = open (file_path , encoding = 'utf-8' ),
map_fn = map_wc , red_fn = red_wc ) # Submit the data
master .start () # Run the job
result = master .result ()
print (result )
if __name__ == '__main__' :
main ()
Finding primes in a given range
Word count of a document
Finding primes in a given range