forked from goldshtn/spark-workshop
-
Notifications
You must be signed in to change notification settings - Fork 2
/
installation.sh
75 lines (66 loc) · 3.02 KB
/
installation.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# oracle java 8
echo "\n" | sudo add-apt-repository ppa:openjdk-r/ppa
sudo apt-get update -y
sudo apt-get install -y openjdk-8-jdk
# spark download and setup
wget https://www-us.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-hadoop2.7.tgz -O /tmp/spark-2.4.0.tgz
sudo ufw disable
sudo mkdir -p /usr/lib/spark
sudo tar -xf /tmp/spark-2.4.0.tgz --strip 1 -C /usr/lib/spark
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> ~/.bash_profile
echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> ~/.bash_profile
echo "export SPARK_HOME=/usr/lib/spark" >> ~/.bash_profile
echo "export PATH=\$SPARK_HOME/bin:\$PATH" >> ~/.bash_profile
source ~/.bash_profile
# spark log config
sudo rm /usr/lib/spark/conf/log4j.properties
sudo touch /usr/lib/spark/conf/log4j.properties
sudo bash -c 'cat << EOF > /usr/lib/spark/conf/log4j.properties
# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=WARN
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=WARN
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
EOF'
# spark default config
sudo rm /usr/lib/spark/conf/spark-defaults.conf
sudo touch /usr/lib/spark/conf/spark-defaults.conf
sudo bash -c 'cat << EOF > /usr/lib/spark/conf/spark-defaults.conf
spark.master spark://$(hostname):7077
spark.eventLog.enabled true
spark.eventLog.dir file:///usr/lib/spark/logs/eventlog
EOF'
sudo mkdir -p /usr/lib/spark/logs/eventlog
sudo chmod -R 777 /usr/lib/spark/logs
# zeppelin setup
wget http://apache.mivzakim.net/zeppelin/zeppelin-0.6.0/zeppelin-0.8.0-bin-all.tgz -O /tmp/zeppelin-0.8.0.tgz
sudo mkdir -p /usr/lib/zeppelin
sudo tar -xf /tmp/zeppelin-0.8.0.tgz --strip 1 -C /usr/lib/zeppelin
# zeppelin config
sudo rm /usr/lib/zeppelin/conf/zeppelin-env.sh
sudo touch /usr/lib/zeppelin/conf/zeppelin-env.sh
sudo bash -c 'cat << EOF > /usr/lib/zeppelin/conf/zeppelin-env.sh
#!/bin/bash
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export MASTER=spark://$(hostname):7077
export SPARK_HOME=/usr/lib/spark
export ZEPPELIN_PORT=9995
EOF'
sudo ufw disable
# start everything up
sudo /usr/lib/spark/sbin/stop-master.sh
sudo /usr/lib/spark/sbin/stop-slave.sh
sudo /usr/lib/spark/sbin/start-master.sh
sudo bash -c '/usr/lib/spark/sbin/start-slave.sh spark://$(hostname):7077'
sudo /usr/lib/zeppelin/bin/zeppelin-daemon.sh restart