It's using Docker, Kubernetes & minikube Scala 2.12.15 Spark 3.2.0 Hadoop 3.3.1
-
Start Docker
-
Start Spark on Kubernetes
# Start minikube
minikube start --memory 8192 --cpus 4
echo $(minikube docker-env)
# Build Docker image
sh build_docker_image.sh
sh start_spark_kubernetes.sh
minikube addons enable ingress
kubectl apply -f ./kubernetes/minikube-ingress.yaml
echo "$(minikube ip) spark-master-ui" | sudo tee -a /etc/hosts
echo "$(minikube ip) spark-history-ui" | sudo tee -a /etc/hosts
Spark master UI
Spark history UI
# Run pyspark on master
kubectl exec spark-master-55f794b67-h4g6j -it -- \
pyspark --conf spark.driver.bindAddress=172.17.0.4 --conf spark.driver.host=172.17.0.4
# Run spark-shell on master
kubectl exec spark-master-55f794b67-h4g6j -it -- \
spark-shell --conf spark.driver.bindAddress=172.17.0.4 --conf spark.driver.host=172.17.0.4
# Run jar on Spark Kubernetes
kubectl apply -f ./kubernetes/spark-driver-job.yaml
- Stop Spark on Kubernetes
kubectl delete -f ./kubernetes/minikube-ingress.yaml
sh stop_spark_kubernetes.sh
minikube stop