Spark Periodic Integration Tests #13
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Spark Periodic Integration Tests | |
on: | |
schedule: | |
- cron: "0 21 * * 4" # Run at 9PM PST on every Thursday | |
workflow_dispatch: | |
jobs: | |
publish-pypi: | |
# similar to release scripts, but publish to test pypi: | |
# rm -rf dist && rm -rf build | |
# python3 -m build && twine check dist/* | |
# twine upload --repository testpypi dist/* | |
# pip3 install -i https://test.pypi.org/simple/ aqueduct-ml | |
name: Publish Test Pypi Packages | |
runs-on: [ubuntu-latest] | |
timeout-minutes: 20 | |
outputs: | |
version: ${{ steps.inject_version.outputs.version }} | |
permissions: | |
id-token: write | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: "3.9" | |
- name: install missing dependencies | |
run: pip install build twine | |
- name: inject dev version | |
id: inject_version | |
run: | | |
export VERSION=0.1.dev$(date +%s) | |
rm sdk/version | |
echo $VERSION >> sdk/version | |
rm src/python/version | |
echo $VERSION >> src/python/version | |
echo version=$VERSION >> $GITHUB_OUTPUT | |
- name: build sdk | |
working-directory: sdk | |
run: | | |
rm -rf dist | |
rm -rf build | |
python3 -m build | |
twine check dist/* | |
- name: publish sdk | |
uses: pypa/gh-action-pypi-publish@release/v1 | |
with: | |
repository-url: https://test.pypi.org/legacy/ | |
packages-dir: sdk/dist | |
password: ${{ secrets.PYPI_API_TOKEN_SDK }} | |
- name: build executor | |
working-directory: src/python | |
run: | | |
rm -rf dist | |
rm -rf build | |
python3 -m build | |
twine check dist/* | |
- name: publish executor | |
uses: pypa/gh-action-pypi-publish@release/v1 | |
with: | |
repository-url: https://test.pypi.org/legacy/ | |
packages-dir: src/python/dist | |
password: ${{ secrets.PYPI_API_TOKEN_EXECUTOR }} | |
run-databricks-tests: | |
runs-on: ubuntu-latest | |
timeout-minutes: 480 | |
needs: publish-pypi | |
name: SDK Integration Tests against Databricks Compute | |
steps: | |
- uses: actions/checkout@v2 | |
- uses: ./.github/actions/setup-server | |
timeout-minutes: 7 | |
with: | |
python-pkg-version: ${{ needs.publish-pypi.outputs.version }} | |
# TODO(ENG-2537): Use our separate GH actions credentials. | |
- uses: ./.github/actions/fetch-test-config | |
with: | |
aws_access_key_id: ${{ secrets.KENNY_AWS_ACCESS_KEY_ID }} | |
aws_secret_access_key: ${{ secrets.KENNY_AWS_SECRET_ACCESS_KEY }} | |
s3_test_config_path: periodic-databricks-test-config.yml | |
- name: Install any data connector packages | |
run: | | |
aqueduct install s3 | |
aqueduct install snowflake | |
- name: Run the SDK Integration Tests | |
working-directory: integration_tests/sdk | |
run: pytest aqueduct_tests/ -rP -vv -n 1 | |
- uses: ./.github/actions/upload-artifacts | |
if: always() | |
with: | |
prefix: Databricks Compute | |
# Sets it as an environmental variable. | |
- name: Get the Slack ID for the current oncall | |
if: always() | |
run: | | |
aws s3 cp s3://aqueduct-assets/oncall.yml ./oncall.yml | |
echo "ONCALL_SLACK_MEMBER_ID=$(python3 scripts/get_current_oncall.py --file ./oncall.yml)" >> $GITHUB_ENV | |
- name: Report to Slack on Failure | |
if: always() | |
uses: ravsamhq/notify-slack-action@v1 | |
with: | |
status: ${{ job.status }} | |
notification_title: "" | |
message_format: "{emoji} *{workflow}* has {status_message}" | |
footer: "{run_url}" | |
notify_when: "failure,warnings" | |
mention_users: ${{ env.ONCALL_SLACK_MEMBER_ID }} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }} | |
run-spark-tests: | |
runs-on: ubuntu-latest | |
timeout-minutes: 480 | |
needs: publish-pypi | |
name: SDK Integration Tests against Spark EMR | |
steps: | |
- uses: actions/checkout@v2 | |
- uses: ./.github/actions/setup-server | |
timeout-minutes: 7 | |
with: | |
python-pkg-version: ${{ needs.publish-pypi.outputs.version }} | |
# TODO(ENG-2537): Use our separate GH actions credentials. | |
- uses: ./.github/actions/fetch-test-config | |
with: | |
aws_access_key_id: ${{ secrets.HARI_ACCESS_KEY_ID }} | |
aws_secret_access_key: ${{ secrets.HARI_SECRET_ACCESS_KEY }} | |
s3_test_config_path: periodic-spark-test-config.yml | |
- name: Install any data connector packages | |
run: | | |
aqueduct install s3 | |
aqueduct install snowflake | |
# TODO(ENG-2537): Use our separate GH actions credentials. | |
- name: Spin up EMR cluster | |
run: | | |
cluster_id=$(aws emr create-cluster \ | |
--name "aqueduct_test" \ | |
--release-label "emr-6.9.0" \ | |
--service-role "${{ secrets.HARI_EMR_DEFAULT_ROLE }}" \ | |
--ec2-attributes '{"InstanceProfile":"EMR_EC2_DefaultRole","EmrManagedMasterSecurityGroup":"${{ secrets.HARI_EMR_MAIN_SECURITY_GROUP }}","EmrManagedSlaveSecurityGroup":"${{ secrets.HARI_EMR_WORKER_SECURITY_GROUP }}","KeyName":"${{ secrets.HARI_AWS_KEYPAIR }}","AdditionalMasterSecurityGroups":[],"AdditionalSlaveSecurityGroups":[],"SubnetId":"${{ secrets.HARI_EMR_SUBNET_ID }}"}' \ | |
--applications Name=Hadoop Name=Hive Name=Livy Name=Spark Name=Zeppelin \ | |
--instance-groups '[{"InstanceCount":1,"InstanceGroupType":"MASTER","Name":"Primary","InstanceType":"m5.2xlarge","EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"VolumeType":"gp2","SizeInGB":32},"VolumesPerInstance":4}]}},{"InstanceCount":1,"InstanceGroupType":"CORE","Name":"Core","InstanceType":"m5.2xlarge","EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"VolumeType":"gp2","SizeInGB":32},"VolumesPerInstance":4}]}},{"InstanceCount":2,"InstanceGroupType":"TASK","Name":"Task - 1","InstanceType":"m5.2xlarge","EbsConfiguration":{"EbsBlockDeviceConfigs":[{"VolumeSpecification":{"VolumeType":"gp2","SizeInGB":32},"VolumesPerInstance":4}]}}]' \ | |
--scale-down-behavior "TERMINATE_AT_TASK_COMPLETION" \ | |
--os-release-label "2.0.20221210.1" \ | |
--region "us-east-2" \ | |
--query 'ClusterId' \ | |
--output text) | |
echo $cluster_id | |
echo "cluster_id=$cluster_id" >> $GITHUB_ENV | |
- name: Wait for EMR cluster to be ready | |
run: | | |
aws emr wait cluster-running --cluster-id "${{ env.cluster_id }}" | |
- name: Get Livy Server URL | |
id: get-livy-server-url | |
run: | | |
livy_server_url=$(aws emr describe-cluster --cluster-id ${{ env.cluster_id }} --query 'Cluster.MasterPublicDnsName' --output text):8998 | |
echo "livy_server_url=http://$livy_server_url" >> $GITHUB_ENV | |
- name: Update the test-credentials file with the appropriate livy server url | |
working-directory: integration_tests/sdk | |
run: sed -i "s#\(livy_server_url:\s*\).*#\1$(echo "${{ env.livy_server_url }}")#" test-credentials.yml | |
- name: Run the SDK Integration Tests | |
working-directory: integration_tests/sdk | |
run: pytest aqueduct_tests/ -rP -vv -n 2 | |
- name: Tear Down EMR cluster | |
id: tear-down-emr-cluster | |
if: always() | |
run: | | |
aws emr terminate-clusters --cluster-ids ${{ env.cluster_id }} | |
- uses: ./.github/actions/upload-artifacts | |
if: always() | |
with: | |
prefix: Spark Compute | |
# Sets it as an environmental variable. | |
- name: Get the Slack ID for the current oncall | |
if: always() | |
run: | | |
aws s3 cp s3://aqueduct-assets/oncall.yml ./oncall.yml | |
echo "ONCALL_SLACK_MEMBER_ID=$(python3 scripts/get_current_oncall.py --file ./oncall.yml)" >> $GITHUB_ENV | |
- name: Report to Slack on Failure | |
if: always() | |
uses: ravsamhq/notify-slack-action@v1 | |
with: | |
status: ${{ job.status }} | |
notification_title: "" | |
message_format: "{emoji} *{workflow}* has {status_message}" | |
footer: "{run_url}" | |
notify_when: "failure,warnings" | |
mention_users: ${{ env.ONCALL_SLACK_MEMBER_ID }} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }} |