Академический Документы
Профессиональный Документы
Культура Документы
0 cluster
# using ubuntu 18.04
#1
sudo vi /etc/hosts
172.20.10.4 server
172.20.10.5 slave1
172.20.10.6 slave2
#2 vi /etc/netplan/50-cloud-init.yml
#changing to static ip
network:
ethernets:
enp0s3:
addresses: [172.20.10.4/24]
gateway4: 172.20.10.1
nameservers:
addresses: [8.8.8.8,8.8.4.4]
dhcp4: no
version: 2
#6 install java
sudo apt install openjdk-8-jdk
update-java-alternatives -l
#7 download hadoop
curl -O http://mirror.cogentco.com/pub/apache/hadoop/common/hadoop-3.1.1/hadoop-
3.1.1.tar.gz
tar -xzf hadoop-3.1.2.tar.gz
sudo mv hadoop-3.1.2 /usr/local/hadoop
mkdir -p /home/ziyati/hadoop_tmp/{data,name}
rm hadoop-3.1.2.tar.gz
source ~/.bashrc
#8 download spark
curl -O https://www-eu.apache.org/dist/spark/spark-2.4.0/spark-2.4.0-bin-
hadoop2.7.tgz
tar -xzf spark-2.4.3-bin-hadoop2.7.tgz
sudo mv spark-2.4.3-bin-hadoop2.7 /usr/local/spark
rm spark-2.4.0-bin-hadoop2.7.tgz
after change ip
you have to actualize
# Format HDFS
hdfs namenode -format
http://172.20.10.4:9870
http://172.20.10.4:8088
#start spark
cd /usr/local/spark/sbin
./start-all.sh
http://172.20.10.4:8080
spark should launched from master
############
# Thanks !
############
# Installing findspark
# findspark is a Python library that automatically allow you to import and use
PySpark as any other Python library.