install python
install jupyter
install java
sudo apt-get install scala
pip3 install py4j
download spark
unzip spark
export SPARK_HOME='/home/xiaoran/spark-2.2.0-bin-hadoop2.7'
export PATH=$SPARK_HOME:$PATH
export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
export PYSPARK_DRIVER_PYTHON="jupyter"
export PYSPARK_DRIVER_OPTS="notebook"
chmod 777 spark-2.2.0-bin-hadoop2.7
inside "/home/xiaoran/spark-2.2.0-bin-hadoop2.7/python"
open terminal, open jupyter notebook
Not only under "/home/xiaoran/spark-2.2.0-bin-hadoop2.7/python"
pip3 install findspark
cd spark-2.2.0-bin-hadoop2.7/
pwd
/home/xiaoran/spark-2.2.0-bin-hadoop2.7
import findspark
findspark.init('/home/xiaoran/spark-2.2.0-bin-hadoop2.7')
import pyspark
- Download new pairs.pem file: actions:terminate
- chmod 400 newaparkpair.pem
- ssh -i newaparkpair.pem ubuntu@ec2-18-216-189-3.us-east-2.compute.amazonaws.com
- now you are running on the EC2 instance on your computer
- download and install Spark
- Install Jupyter Notebook
- Connect with PySpark
- Access EC2 Jupyter Notebook in our local bowser
-
sudo apt-get update
-
sudo apt install python3-pip
-
pip3 install jupyter
-
sudo apt-get install default-jre (install java)
-
sudo apt-get install scala
-
pip3 install py4j
-
wget http://archive.apache.org/dist/spark/spark-2.2.0/spark-2.2.0-bin-hadoop2.7.tgz
-
sudo tar -zxvf spark-2.2.0-bin-hadoop2.7.tgz (install hadoop)
-
cd spark-2.2.0-bin-hadoop2.7/
-
pwd (/home/ubuntu/spark-2.2.0-bin-hadoop2.7)
-
cd
-
pip3 install findspark
-
jupyter notebook --generate-config
-
sudo openssl req -x509 -nodes -days 365 -newkey rsa:1024 -keyout mycert.pem -out mycert.pem
-
cd ~/.jupyter/
-
vi jupyter_notebook_config.py
-
keyboard i is insert
c = get_config()
c.NotebookApp.certifile = u'/home/ubuntu/certs/mycert.pem'
c.NotebookApp.ip = '*'
c.NotebookApp.open_browser = False
c.NotebookApp.port = 8888
hit esc to quit insert -
wq!
19.cd -
jupyter notebook
http://publicDNS:8888/?token=fc4eb092caf715a25c4263f23299524c4b85e9ca95478ccf
import findspark
findspark.init('/home/ubuntu/spark-2.2.0-bin-hadoop2.7')