-
Notifications
You must be signed in to change notification settings - Fork 0
/
fabfile.py
311 lines (251 loc) · 11.6 KB
/
fabfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
from fabric.api import env, task, local, run, sudo, parallel
from fabric.operations import put
from fabric.contrib.files import exists, contains, append, sed, upload_template, comment, uncomment
from fabric.utils import error, abort
from fabric.context_managers import cd, settings, hide
from fabric.decorators import hosts
import os, io
from pipes import quote
from crypt import crypt
INSTALL_FILES = './temp_files'
PUBKEY = os.path.join(os.environ['HOME'], '.ssh/id_rsa.pub')
HADOOP_VERSION = '2.8.1'
SPARK_VERSION = '2.2.0'
SPARK_HADOOP_COMPAT = '2.7'
HADOOP_TARFILE = 'hadoop-%s.tar.gz' % (HADOOP_VERSION,)
HADOOP_APACHE_PATH = '/hadoop/common/hadoop-%s/%s' % (HADOOP_VERSION, HADOOP_TARFILE)
HADOOP_INSTALL = '/opt/hadoop-%s' % (HADOOP_VERSION,)
SPARK_TARFILE = 'spark-%s-bin-hadoop%s.tgz' % (SPARK_VERSION, SPARK_HADOOP_COMPAT)
SPARK_APACHE_PATH = 'spark/spark-%s/%s' % (SPARK_VERSION, SPARK_TARFILE)
SPARK_INSTALL = '/opt/spark-%s-bin-hadoop%s' % (SPARK_VERSION, SPARK_HADOOP_COMPAT)
NUM_SLAVES = 6
SLAVES = ['hadoop%i.local' % (i) for i in range(1, NUM_SLAVES+1)]
HOSTS = ['master.local'] + SLAVES
if not env.hosts:
env.hosts = HOSTS
env.user = 'pi'
env.skip_bad_hosts = True
def cmd(c, *args):
"""
Helper to escape command line arguments in a shell command
"""
return c % tuple(map(quote, args))
def install_file(f):
"""
Local path for an install file.
"""
return os.path.join(INSTALL_FILES, f)
@task
def auth_config():
"""
Set up SSH keys for the pi@ user
"""
local(cmd('mkdir -p %s', INSTALL_FILES))
# user SSH key
if not os.path.isfile(install_file('id_rsa')):
local(cmd("ssh-keygen -t rsa -b 4096 -N '' -C 'cluster user key' -f %s", install_file('id_rsa')))
run('mkdir -p -m 0700 .ssh')
if not exists('.ssh/id_rsa'):
put(install_file('id_rsa.pub'), '.ssh/id_rsa.pub', mode=0o0600)
put(install_file('id_rsa'), '.ssh/id_rsa', mode=0o0600)
pubkey = open(install_file('id_rsa.pub'), 'r').read()
if not contains('.ssh/authorized_keys', pubkey):
append('.ssh/authorized_keys', pubkey)
pubkey = open(PUBKEY, 'r').read()
if not contains('~/.ssh/authorized_keys', pubkey, exact=True):
append('.ssh/authorized_keys', pubkey)
@task
def clean_raspbian():
"""
Uninstall stuff we don't need from Raspbian.
"""
# based on https://gist.github.com/bivald/4182851 and forks
sudo('apt-get update')
sudo('''apt-get -y purge xserver* x11-utils x11-xkb-utils x11-xserver-utils xarchiver xauth xkb-data console-setup xinit lightdm \
obconf openbox alsa* python-pygame python-tk python3-tk scratch tsconf aspell hunspell-en-us iptraf libaspell15 \
libhunspell-1.2-0 lxde lxsession lxtask lxterminal squeak-vm zenity gdm gnome-themes-standard python-pygame \
desktop-file-utils omxplayer xserver-xorg ^lx samba-common smbclient cups-bsd cups-client cups-common \
wolfram-engine cifs-utils samba-common \
libsysfs2 gstreamer* libident libboost* libsigc++* x2x fbset libfreetype6-dev libept-dev gtk2-engines gpicview \
gnome-themes-standard-data gnome-icon-theme galculator python3-picamera \
python3-serial python-picamera xpdf timidity sonic-pi \
python3-rpi.gpio python-rpi.gpio v4l-utils xdg-utils minecraft-pi libept* \
smartsim luajit libapt-pkg-dev libtagcoll2-dev libxapian* raspberrypi-artwork libudev0
''')
sudo('apt-get -y autoremove')
sudo('apt-get -y dist-upgrade')
sudo('apt-get -y autoclean')
sudo('apt-get -y clean')
run('rm -rf /home/pi/python_games')
def _get_apache_file(path, tarfile):
if not os.path.isfile(install_file(tarfile)):
local(cmd('%s %s -O %s', install_file('grrrr'), path, install_file(tarfile)))
@task
def fetch_files():
if not os.path.isdir(INSTALL_FILES):
local(cmd('mkdir %s', INSTALL_FILES))
grrrr = install_file('grrrr')
if not os.path.isfile(grrrr):
local(cmd('wget --no-check-certificate http://raw.githubusercontent.com/fs111/grrrr/master/grrr -O %s && chmod +x %s', grrrr, grrrr))
_get_apache_file(HADOOP_APACHE_PATH, HADOOP_TARFILE)
_get_apache_file(SPARK_APACHE_PATH, SPARK_TARFILE)
@task
def set_hostname():
try:
hostname = env['hostname']
except KeyError:
error('Must specify --set=hostname=<newhostname> on command line', abort)
sudo(cmd('echo %s > /etc/hostname', hostname))
sed('/etc/hosts', '127.0.1.1\s+.*', '127.0.1.1 '+hostname, use_sudo=True)
# make sure we have a unique SSH signature on this new node
sudo('rm /etc/ssh/ssh_host_*')
sudo('dpkg-reconfigure openssh-server')
sudo('reboot')
def failure(cmd, use_sudo=False, shell=False):
func = use_sudo and sudo or run
with settings(hide('everything'), warn_only=True):
return not func(cmd, shell=shell).succeeded
@task
@parallel
def node_config():
"""
Basic system setup
"""
if not exists('/usr/sbin/ntpdate'):
sudo('apt-get -y install ntpdate')
if not exists('/usr/bin/sshfs'):
# handy to copy files onto the cluster
sudo('apt-get -y install sshfs')
#put('files/interfaces', '/etc/network/interfaces', use_sudo=True)
upload_template('files/hadoop.sh', '/etc/profile.d/hadoop.sh', context={'hadoop_home': HADOOP_INSTALL}, use_sudo=True)
run('mkdir -m 0755 -p ~/bin')
upload_template('files/exec-all.sh', 'bin/exec-all', context={'slaves_list': ' '.join(SLAVES)}, mode=0o0755)
if exists('python_games'):
run('rm -rf python_games')
if not exists('/usr/share/pam-configs/systemd'):
sudo('apt-get -y install libpam-systemd')
# Hadoop user
if failure('egrep -q "^hadoop:" /etc/passwd'):
sudo('adduser --system --shell=/bin/bash --home /home/hadoop --group --disabled-password hadoop')
#sudo('chsh -s /bin/bash hadoop')
sudo('grep -q "^supergroup" /etc/group || groupadd supergroup')
sudo('usermod -a -G supergroup pi')
sudo('usermod -a -G supergroup hadoop')
# mount USB key (formatted "mkfs.ext4 -L HADOOP")
append('/etc/fstab', 'LABEL=HADOOP /hadoop ext4 defaults,relatime,noauto 0 0', use_sudo=True)
append('/etc/rc.local', 'mount /hadoop || true', use_sudo=True)
append('/etc/rc.local', 'chown hadoop:hadoop /hadoop || true', use_sudo=True)
comment('/etc/rc.local', '^exit 0', use_sudo=True)
sudo('mkdir -p /hadoop')
sudo('chown hadoop:hadoop /hadoop')
sudo('/etc/rc.local')
# SSH keys
if not os.path.isfile(install_file('hadoop_id_rsa')):
local(cmd("ssh-keygen -t rsa -b 4096 -N '' -C 'cluster root key' -f %s", install_file('hadoop_id_rsa')))
sudo('mkdir -p -m 0700 /home/hadoop/.ssh')
upload_template('files/ssh-config', '.ssh/config', context={'host_list': ' '.join(HOSTS)}, mode=0o0600)
if not exists('/home/hadoop/.ssh/id_rsa'):
put(install_file('hadoop_id_rsa.pub'), '/home/hadoop/.ssh/id_rsa.pub', mode=0o0644, use_sudo=True)
put(install_file('hadoop_id_rsa.pub'), '/home/hadoop/.ssh/authorized_keys', mode=0o0644, use_sudo=True)
put(install_file('hadoop_id_rsa'), '/home/hadoop/.ssh/id_rsa', mode=0o0600, use_sudo=True)
sudo('chown -R hadoop:hadoop /home/hadoop/.ssh')
# /etc/hosts dynamic reconfig (needed by zookeeper)
#if not exists('/etc/hosts.template'):
# sudo('(grep -v 127.0.1.1 /etc/hosts | grep -v "# auto"; echo "HOST # auto") > /etc/hosts.template')
#put('files/update-hosts.sh', '/etc/network/if-up.d/update-hosts.sh', mode=0o0755, use_sudo=True)
# Java
if not exists('/usr/bin/java'):
sudo('apt-get -y install oracle-java8-jdk')
ssh_keys_cache = None
def collect_ssh_keys():
global ssh_keys_cache
if ssh_keys_cache:
return ssh_keys_cache
ssh_keys = []
for h in HOSTS:
with settings(warn_only=True):
key = local(cmd('ssh-keyscan %s', h), capture=True)
if key:
ssh_keys.append(key)
ssh_keys_cache = '\n'.join(ssh_keys)
return ssh_keys_cache + '\n'
@task
def ssh_keyscan():
ssh_keys = collect_ssh_keys()
keydata = io.StringIO(ssh_keys)
put(keydata, '/home/hadoop/.ssh/known_hosts', use_sudo=True)
sudo('chown hadoop.hadoop /home/hadoop/.ssh/known_hosts')
run('mkdir -p .ssh && chmod 0700 .ssh')
put(keydata, '.ssh/known_hosts')
@task
@parallel
def install_hadoop():
# Hadoop
sudo('mkdir -p /opt')
if not exists(os.path.join(HADOOP_INSTALL, 'bin/hadoop')):
put(install_file(HADOOP_TARFILE), os.path.join('/opt', HADOOP_TARFILE), use_sudo=True)
with cd('/opt'):
sudo(cmd('tar zxf %s', HADOOP_TARFILE))
sudo(cmd('rm %s', os.path.join('/opt', HADOOP_TARFILE)))
sudo(cmd('chown -R hadoop.hadoop %s', HADOOP_INSTALL))
# Hadoop config files
put('files/core-site.xml', '%s/etc/hadoop/core-site.xml' % (HADOOP_INSTALL,), use_sudo=True)
put('files/hdfs-site.xml', '%s/etc/hadoop/hdfs-site.xml' % (HADOOP_INSTALL,), use_sudo=True)
put('files/yarn-site.xml', '%s/etc/hadoop/yarn-site.xml' % (HADOOP_INSTALL,), use_sudo=True)
put('files/mapred-site.xml', '%s/etc/hadoop/mapred-site.xml' % (HADOOP_INSTALL,), use_sudo=True)
put('files/masters', '%s/etc/hadoop/masters' % (HADOOP_INSTALL,), use_sudo=True)
upload_template('files/slaves', '%s/etc/hadoop/slaves' % (HADOOP_INSTALL,), context={'slaves_list': '\n'.join(SLAVES)}, use_sudo=True)
hadoop_env = '%s/etc/hadoop/hadoop-env.sh' % (HADOOP_INSTALL,)
sed(hadoop_env, '^export JAVA_HOME=.*', 'export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")', use_sudo=True)
uncomment(hadoop_env, 'export\s+HADOOP_HEAPSIZE=', use_sudo=True)
sed(hadoop_env, '^export\s+HADOOP_HEAPSIZE=.*', 'export HADOOP_HEAPSIZE=256', use_sudo=True)
if not contains(hadoop_env, '^export HADOOP_DATANODE_OPTS=.*-client', escape=False, use_sudo=True):
sed(hadoop_env, '^export HADOOP_DATANODE_OPTS="(.*)"$', 'export HADOOP_DATANODE_OPTS="\\1 -client"', use_sudo=True)
sudo(cmd('ln -sf %s /opt/hadoop', HADOOP_INSTALL))
for f in ['start-all.sh', 'stop-all.sh', 'dfs-format.sh', 'clear-dfs.sh', 'nuke-dfs.sh', 'halt-all.sh', 'hdfs-balance.sh']:
put('files/' + f, 'bin/' + f.replace('.sh', ''), mode=0o0755)
# HDFS directories
sudo('mkdir -p -m 0750 /hadoop/tmp && chown hadoop:hadoop /hadoop/tmp')
sudo('mkdir -p /hadoop/namenode && chown hadoop:hadoop /hadoop/namenode')
sudo('mkdir -p /hadoop/datanode && chown hadoop:hadoop /hadoop/datanode')
@task
@parallel
def install_spark():
"""
Install Spark on the master node
"""
if not exists(os.path.join(SPARK_INSTALL, 'bin/spark-submit')):
put(install_file(SPARK_TARFILE), os.path.join('/opt', SPARK_TARFILE), use_sudo=True)
with cd('/opt'):
sudo(cmd('tar zxf %s', SPARK_TARFILE))
sudo(cmd('rm %s', os.path.join('/opt', SPARK_TARFILE)))
sudo(cmd('chown -R hadoop.hadoop %s', SPARK_INSTALL))
sudo(cmd('ln -sf %s /opt/spark', SPARK_INSTALL))
upload_template('files/spark-defaults.conf', '%s/conf/spark-defaults.conf' % (SPARK_INSTALL,), context={'spark_install': SPARK_INSTALL}, use_sudo=True)
@task
@hosts('master.local')
def course_prep():
"""
Prep specific to SFU CMPT 732
"""
put('files/Makefile', 'Makefile')
@task
@parallel
def change_password():
"""
Change the password on the pi@ accounts
"""
try:
passwd = env['passwd']
except KeyError:
error('Must specify --set=passwd="<newpasswd>" on command line', abort)
cpw = crypt(passwd, 'mmmsalt')
sudo(cmd('usermod --password %s pi', cpw))
@task
def send_cmd():
try:
cmd = env['cmd']
except KeyError:
error('Must specify --set=cmd="the_command to run" on command line', abort)
with settings(warn_only=True):
sudo(cmd)