Spark Periodic Integration Tests #13

Workflow file for this run

.github/workflows/spark-periodic-tests.yml at 840f917

	name: Spark Periodic Integration Tests

	on:
	schedule:
	- cron: "0 21 * * 4" # Run at 9PM PST on every Thursday
	workflow_dispatch:

	jobs:
	publish-pypi:
	# similar to release scripts, but publish to test pypi:
	# rm -rf dist && rm -rf build
	# python3 -m build && twine check dist/*
	# twine upload --repository testpypi dist/*
	# pip3 install -i https://test.pypi.org/simple/ aqueduct-ml
	name: Publish Test Pypi Packages
	runs-on: [ubuntu-latest]
	timeout-minutes: 20
	outputs:
	version: ${{ steps.inject_version.outputs.version }}
	permissions:
	id-token: write
	steps:
	- uses: actions/checkout@v2
	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: "3.9"
	- name: install missing dependencies
	run: pip install build twine

	- name: inject dev version
	id: inject_version
	run: \|
	export VERSION=0.1.dev$(date +%s)
	rm sdk/version
	echo $VERSION >> sdk/version
	rm src/python/version
	echo $VERSION >> src/python/version
	echo version=$VERSION >> $GITHUB_OUTPUT

	- name: build sdk
	working-directory: sdk
	run: \|
	rm -rf dist
	rm -rf build
	python3 -m build
	twine check dist/*

	- name: publish sdk
	uses: pypa/gh-action-pypi-publish@release/v1
	with:
	repository-url: https://test.pypi.org/legacy/
	packages-dir: sdk/dist
	password: ${{ secrets.PYPI_API_TOKEN_SDK }}

	- name: build executor
	working-directory: src/python
	run: \|
	rm -rf dist
	rm -rf build
	python3 -m build
	twine check dist/*

	- name: publish executor
	uses: pypa/gh-action-pypi-publish@release/v1
	with:
	repository-url: https://test.pypi.org/legacy/
	packages-dir: src/python/dist
	password: ${{ secrets.PYPI_API_TOKEN_EXECUTOR }}

	run-databricks-tests:
	runs-on: ubuntu-latest
	timeout-minutes: 480
	needs: publish-pypi
	name: SDK Integration Tests against Databricks Compute
	steps:
	- uses: actions/checkout@v2

	- uses: ./.github/actions/setup-server
	timeout-minutes: 7
	with:
	python-pkg-version: ${{ needs.publish-pypi.outputs.version }}

	# TODO(ENG-2537): Use our separate GH actions credentials.
	- uses: ./.github/actions/fetch-test-config
	with:
	aws_access_key_id: ${{ secrets.KENNY_AWS_ACCESS_KEY_ID }}
	aws_secret_access_key: ${{ secrets.KENNY_AWS_SECRET_ACCESS_KEY }}
	s3_test_config_path: periodic-databricks-test-config.yml

	- name: Install any data connector packages
	run: \|
	aqueduct install s3
	aqueduct install snowflake

	- name: Run the SDK Integration Tests
	working-directory: integration_tests/sdk
	run: pytest aqueduct_tests/ -rP -vv -n 1

	- uses: ./.github/actions/upload-artifacts
	if: always()
	with:
	prefix: Databricks Compute

	# Sets it as an environmental variable.
	- name: Get the Slack ID for the current oncall
	if: always()
	run: \|
	aws s3 cp s3://aqueduct-assets/oncall.yml ./oncall.yml
	echo "ONCALL_SLACK_MEMBER_ID=$(python3 scripts/get_current_oncall.py --file ./oncall.yml)" >> $GITHUB_ENV

	- name: Report to Slack on Failure
	if: always()
	uses: ravsamhq/notify-slack-action@v1
	with:
	status: ${{ job.status }}
	notification_title: ""
	message_format: "{emoji} {workflow} has {status_message}"
	footer: "{run_url}"
	notify_when: "failure,warnings"
	mention_users: ${{ env.ONCALL_SLACK_MEMBER_ID }}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}

	run-spark-tests:
	runs-on: ubuntu-latest
	timeout-minutes: 480
	needs: publish-pypi
	name: SDK Integration Tests against Spark EMR
	steps:
	- uses: actions/checkout@v2

	- uses: ./.github/actions/setup-server
	timeout-minutes: 7
	with:
	python-pkg-version: ${{ needs.publish-pypi.outputs.version }}

	# TODO(ENG-2537): Use our separate GH actions credentials.
	- uses: ./.github/actions/fetch-test-config
	with:
	aws_access_key_id: ${{ secrets.HARI_ACCESS_KEY_ID }}
	aws_secret_access_key: ${{ secrets.HARI_SECRET_ACCESS_KEY }}
	s3_test_config_path: periodic-spark-test-config.yml

	- name: Install any data connector packages
	run: \|
	aqueduct install s3
	aqueduct install snowflake

	# TODO(ENG-2537): Use our separate GH actions credentials.
	- name: Spin up EMR cluster
	run: \|
	cluster_id=$(aws emr create-cluster \
	--name "aqueduct_test" \
	--release-label "emr-6.9.0" \
	--service-role "${{ secrets.HARI_EMR_DEFAULT_ROLE }}" \
	--ec2-attributes '{"InstanceProfile":"EMR_EC2_DefaultRole","EmrManagedMasterSecurityGroup":"${{ secrets.HARI_EMR_MAIN_SECURITY_GROUP }}","EmrManagedSlaveSecurityGroup":"${{ secrets.HARI_EMR_WORKER_SECURITY_GROUP }}","KeyName":"${{ secrets.HARI_AWS_KEYPAIR }}","AdditionalMasterSecurityGroups":[],"AdditionalSlaveSecurityGroups":[],"SubnetId":"${{ secrets.HARI_EMR_SUBNET_ID }}"}' \
	--applications Name=Hadoop Name=Hive Name=Livy Name=Spark Name=Zeppelin \
	--instance-groups '[{"InstanceCount":1,"InstanceGroupType":"MASTER","Name":"Primary","InstanceType":"m5.2xlarge","EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"VolumeType":"gp2","SizeInGB":32},"VolumesPerInstance":4}]}},{"InstanceCount":1,"InstanceGroupType":"CORE","Name":"Core","InstanceType":"m5.2xlarge","EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"VolumeType":"gp2","SizeInGB":32},"VolumesPerInstance":4}]}},{"InstanceCount":2,"InstanceGroupType":"TASK","Name":"Task - 1","InstanceType":"m5.2xlarge","EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"VolumeType":"gp2","SizeInGB":32},"VolumesPerInstance":4}]}}]' \
	--scale-down-behavior "TERMINATE_AT_TASK_COMPLETION" \
	--os-release-label "2.0.20221210.1" \
	--region "us-east-2" \
	--query 'ClusterId' \
	--output text)
	echo $cluster_id
	echo "cluster_id=$cluster_id" >> $GITHUB_ENV

	- name: Wait for EMR cluster to be ready
	run: \|
	aws emr wait cluster-running --cluster-id "${{ env.cluster_id }}"

	- name: Get Livy Server URL
	id: get-livy-server-url
	run: \|
	livy_server_url=$(aws emr describe-cluster --cluster-id ${{ env.cluster_id }} --query 'Cluster.MasterPublicDnsName' --output text):8998
	echo "livy_server_url=http://$livy_server_url" >> $GITHUB_ENV

	- name: Update the test-credentials file with the appropriate livy server url
	working-directory: integration_tests/sdk
	run: sed -i "s#$livy_server_url:\s$.#\1$(echo "${{ env.livy_server_url }}")#" test-credentials.yml

	- name: Run the SDK Integration Tests
	working-directory: integration_tests/sdk
	run: pytest aqueduct_tests/ -rP -vv -n 2

	- name: Tear Down EMR cluster
	id: tear-down-emr-cluster
	if: always()
	run: \|
	aws emr terminate-clusters --cluster-ids ${{ env.cluster_id }}

	- uses: ./.github/actions/upload-artifacts
	if: always()
	with:
	prefix: Spark Compute

	# Sets it as an environmental variable.
	- name: Get the Slack ID for the current oncall
	if: always()
	run: \|
	aws s3 cp s3://aqueduct-assets/oncall.yml ./oncall.yml
	echo "ONCALL_SLACK_MEMBER_ID=$(python3 scripts/get_current_oncall.py --file ./oncall.yml)" >> $GITHUB_ENV

	- name: Report to Slack on Failure
	if: always()
	uses: ravsamhq/notify-slack-action@v1
	with:
	status: ${{ job.status }}
	notification_title: ""
	message_format: "{emoji} {workflow} has {status_message}"
	footer: "{run_url}"
	notify_when: "failure,warnings"
	mention_users: ${{ env.ONCALL_SLACK_MEMBER_ID }}
	env:
	SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Spark Periodic Integration Tests #13

Workflow file

Spark Periodic Integration Tests #13

Jobs

Run details

Workflow file for this run