-
Notifications
You must be signed in to change notification settings - Fork 0
/
passwd_HDFS.txt
40 lines (32 loc) · 996 Bytes
/
passwd_HDFS.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
--->cp /etc/passwd /home/cloudera/passwd
reducer.py
#!/usr/bin/python
import sys
password= None
oldKey= None
for line in sys.stdin:
data_mapped = line.strip().split(":")
if len(data_mapped) != 2:
continue
thisKey, thisPass = data_mapped
if oldKey and oldKey != thisKey:
print(oldKey,":", password)
oldKey = thisKey;
password = thisPass;
oldKey = thisKey
if oldKey != None:
print(oldKey,":", password)
mapper.py
#!/usr/bin/python
import sys
for line in sys.stdin:
data = line.strip().split(":")
if len(data) == 7:
username, password, uid, gid, fullname,hdirect, loginshell = data
print("{0}:{1}".format(username, password))
> hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
-input /user/cloudera/input \
-output /user/cloudera/output \
-mapper /home/cloudera/mapper.py \
-reducer /home/cloudera/reducer.py
>hdfs dfsadmin -safemode leave