Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jazibhumayun/wt ci notification #1317

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
116 changes: 116 additions & 0 deletions dataeng/jobs/analytics/WarehouseTransformsCItest.groovy
@@ -0,0 +1,116 @@
package analytics
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator
import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers
import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters
import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization
import static org.edx.jenkins.dsl.JenkinsPublicConstants.GHPRB_CANCEL_BUILDS_ON_UPDATE

class WarehouseTransformsCItest{
public static def job = { dslFactory, allVars ->
dslFactory.job("warehouse-transforms-ci-test"){
authorization common_authorization(allVars)
logRotator common_log_rotator(allVars)
parameters secure_scm_parameters(allVars)
parameters {
stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.')
stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.')
stringParam('PROJECT_URL', allVars.get('PROJECT_URL'), 'Github Project URL necessary to give when using GHPRB plugin.')
stringParam('DBT_TARGET', allVars.get('DBT_TARGET'), 'DBT target from profiles.yml in analytics-secure.')
stringParam('DBT_PROFILE', allVars.get('DBT_PROFILE'), 'DBT profile from profiles.yml in analytics-secure.')
stringParam('DBT_PROJECT_PATH', allVars.get('DBT_PROJECT_PATH'), 'Path in warehouse-transforms to use as the dbt project, relative to "projects" (usually automated/applications or reporting).')
stringParam('DBT_RUN_OPTIONS', allVars.get('DBT_RUN_OPTIONS'), 'Additional options to dbt run, such as --models for model selection. Details here: https://docs.getdbt.com/docs/model-selection-syntax')
stringParam('DBT_RUN_EXCLUDE', allVars.get('DBT_RUN_EXCLUDE'), 'Additional options to dbt run, such as --exclude. Details here: https://docs.getdbt.com/docs/model-selection-syntax')
stringParam('DBT_TEST_OPTIONS', allVars.get('DBT_TEST_OPTIONS'), 'Additional options to dbt test, such as --models for model selection. Details here: https://docs.getdbt.com/docs/model-selection-syntax')
stringParam('DBT_TEST_EXCLUDE', allVars.get('DBT_TEST_EXCLUDE'), 'Additional options to dbt test, such as --exclude. Details here: https://docs.getdbt.com/docs/model-selection-syntax')
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.')
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.')
stringParam('JENKINS_JOB_DSL_URL', allVars.get('JENKINS_JOB_DSL_URL'), 'URL for the jenkins-job-dsl repo.')
stringParam('JENKINS_JOB_DSL_BRANCH', allVars.get('JENKINS_JOB_DSL_BRANCH'), 'Branch of jenkins-job-dsl repo to use.')
stringParam('DB_NAME', allVars.get('DB_NAME'), 'Database name used to create output schema of dbt run/tests')
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.')
}
environmentVariables {
env('KEY_PATH', allVars.get('KEY_PATH'))
env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH'))
env('USER', allVars.get('USER'))
env('ACCOUNT', allVars.get('ACCOUNT'))
}
scm {
github('edx/warehouse-transforms')
}
multiscm secure_scm(allVars) << {
git {
remote {
url('$WAREHOUSE_TRANSFORMS_URL')
refspec('+refs/pull/*:refs/remotes/origin/pr/*')
credentials('1')
}
branches('\${ghprbActualCommit}')
extensions {
relativeTargetDirectory('warehouse-transforms')
pruneBranches()
cleanAfterCheckout()
}
}
git {
remote {
url('$ANALYTICS_TOOLS_URL')
branch('$ANALYTICS_TOOLS_BRANCH')
credentials('1')
}
extensions {
relativeTargetDirectory('analytics-tools')
pruneBranches()
cleanAfterCheckout()
}
}
git {
remote {
url('$JENKINS_JOB_DSL_URL')
branch('$JENKINS_JOB_DSL_BRANCH')
credentials('1')
}
extensions {
relativeTargetDirectory('jenkins-job-dsl')
pruneBranches()
cleanAfterCheckout()
}
}
}
triggers {
githubPullRequest {
// since the server running this job will not be publicly available,
// we cannot rely on Github to deliver webhooks. Instead, poll GH
// every 3 minutes for updates any branches.
cron('H/3 * * * *')
triggerPhrase('jenkins run dbt') // You this trigger phrase to on Pull Rquest comment to trigger this job
onlyTriggerPhrase(true) // true if you want the job to only fire when commented on (not on commits)
orgWhitelist(['edx-ops', 'edX']) // All the Github users under these orgs will be able to trigger this job via PR. As this job will be used by many edXers so giving the trigger access to all under edX.
}
}
configure GHPRB_CANCEL_BUILDS_ON_UPDATE(false)

wrappers {
colorizeOutput('xterm')
}
wrappers common_wrappers(allVars)
steps {
shell(dslFactory.readFileFromWorkspace('dataeng/resources/warehouse-transforms-citest.sh'))
// virtualenv {
// pythonName('PYTHON_3.7')
// nature("shell")
// systemSitePackages(false)
// command(
// dslFactory.readFileFromWorkspace("dataeng/resources/warehouse-transforms-ci.sh")
// )
// }
}
publishers common_publishers(allVars)
}
}
}


2 changes: 2 additions & 0 deletions dataeng/jobs/createJobsNew.groovy
Expand Up @@ -6,6 +6,7 @@ import static analytics.TerminateCluster.job as TerminateClusterJob
import static analytics.UpdateUsers.job as UpdateUsersJob
import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob
import static analytics.WarehouseTransformsCI.job as WarehouseTransformsCIJob
import static analytics.WarehouseTransformsCItest.job as WarehouseTransformsCIJobtest
import static org.edx.jenkins.dsl.JenkinsPublicConstants.DEFAULT_VIEW
import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.error.YAMLException
Expand Down Expand Up @@ -36,6 +37,7 @@ def taskMap = [
UPDATE_USERS_JOB: UpdateUsersJob,
SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob,
WAREHOUSE_TRANSFORMS_CI_JOB: WarehouseTransformsCIJob,
WAREHOUSE_TRANSFORMS_CI_JOB_TEST: WarehouseTransformsCIJobtest,
]

for (task in taskMap) {
Expand Down
147 changes: 147 additions & 0 deletions dataeng/resources/warehouse-transforms-citest.sh
@@ -0,0 +1,147 @@
#!/usr/bin/env bash
set -ex

PLATFORM_VENV="platform_venv"
virtualenv --python=python3.7 --clear "${PLATFORM_VENV}"
source "${PLATFORM_VENV}/bin/activate"


# Setup to run python script to create snowflake schema
cd $WORKSPACE/analytics-tools/snowflake
make requirements

# Download Prod build manifest.json file from S3 and creating directory to place manifest file.
cd $WORKSPACE/ && mkdir -p manifest

pip install awscli

aws s3 cp s3://edx-dbt-docs/manifest.json ${WORKSPACE}/manifest

# Setup to run dbt commands
cd $WORKSPACE/warehouse-transforms

# Pull the origin master code to latest branch which will be used to compare the diff
git pull -f origin master:latest

# Put back the head at PR commit
git checkout ${ghprbActualCommit}

git rebase latest

git diff latest --name-only

# Finding the project names which has changed in this PR. Using git diff latest to compare this branch from master
# It returns all the files name with full path. Searching through it using egrep to find which project(s) the changing files belong.
# It might happen one PR may be changing files in different projects.
if git diff latest --name-only | egrep "projects/reporting" -q; then isReporting="true"; else isReporting="false"; fi
if git diff latest --name-only | egrep "projects/automated/applications" -q; then isApplications="true"; else isApplications="false"; fi
if git diff latest --name-only | egrep "projects/automated/raw_to_source" -q; then isRawToSource="true"; else isRawToSource="false"; fi
if git diff latest --name-only | egrep "projects/automated/telemetry" -q; then isTelemetry="true"; else isTelemetry="false"; fi


# Setup to run dbt commands
cd $WORKSPACE/warehouse-transforms

# To install right version of dbt
pip install -r requirements.txt

if [ "$isReporting" == "true" ]
then

# Overide the commit author in case of reporting
NOTIFY=${ghprbActualCommitAuthorEmail}

cd $WORKSPACE/analytics-tools/snowflake

# Schema_Name will be the Github Pull Request ID e.g. 1724 prefixed with 'PR_*' and sufixed with project name e.g. PR_1724_reporting
export CI_SCHEMA_NAME=PR_${ghprbPullId}_reporting
# Schema is dynamically created against each PR. It is the PR number with 'PR_*' as prefixed.
# profiles.yml contains the name of Schema which is used to create output models when dbt runs.
python create_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME

DBT_PROJECT_PATH='reporting'
# This is a Slim CI syntax used to "run" only modified and downstream models
DBT_RUN_OPTIONS="-m state:modified+ @state:modified,1+test_type:data --defer --state $WORKSPACE/manifest"
DBT_RUN_EXCLUDE='' ## TODO Add excluded models here
# Will add --defer here when DBT version is upgraded
# This is a Slim CI syntax used to "test" only modified and downstream models
DBT_TEST_OPTIONS="-m state:modified+ --state $WORKSPACE/manifest"
DBT_TEST_EXCLUDE='--exclude test_name:relationships'

source $WORKSPACE/jenkins-job-dsl/dataeng/resources/warehouse-transforms-ci-dbt.sh

cd $WORKSPACE/analytics-tools/snowflake
python remove_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME


fi


if [ "$isApplications" == "true" ]
then

cd $WORKSPACE/analytics-tools/snowflake
export CI_SCHEMA_NAME=PR_${ghprbPullId}_applications
python create_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME

DBT_PROJECT_PATH='automated/applications'
DBT_RUN_OPTIONS=''
DBT_RUN_EXCLUDE=''
DBT_TEST_OPTIONS=''
DBT_TEST_EXCLUDE=''

source $WORKSPACE/jenkins-job-dsl/dataeng/resources/warehouse-transforms-ci-dbt.sh

cd $WORKSPACE/analytics-tools/snowflake
python remove_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME


fi

if [ "$isRawToSource" == "true" ]
then

# Overide the commit author in case of reporting
NOTIFY=${ghprbActualCommitAuthorEmail}

cd $WORKSPACE/analytics-tools/snowflake
export CI_SCHEMA_NAME=PR_${ghprbPullId}_raw_to_source
python create_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME

DBT_PROJECT_PATH='automated/raw_to_source'
DBT_RUN_OPTIONS=''
DBT_RUN_EXCLUDE=''
DBT_TEST_OPTIONS=''
DBT_TEST_EXCLUDE=''

source $WORKSPACE/jenkins-job-dsl/dataeng/resources/warehouse-transforms-ci-dbt.sh

cd $WORKSPACE/analytics-tools/snowflake
python remove_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME


fi


if [ "$isTelemetry" == "true" ]
then

# Overide the commit author in case of reporting
NOTIFY=${ghprbActualCommitAuthorEmail}

cd $WORKSPACE/analytics-tools/snowflake
export CI_SCHEMA_NAME=PR_${ghprbPullId}_telemetry
python create_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME

DBT_PROJECT_PATH='automated/telemetry'
DBT_RUN_OPTIONS=''
DBT_RUN_EXCLUDE=''
DBT_TEST_OPTIONS=''
DBT_TEST_EXCLUDE=''

source $WORKSPACE/jenkins-job-dsl/dataeng/resources/warehouse-transforms-ci-dbt.sh

cd $WORKSPACE/analytics-tools/snowflake
python remove_ci_schema.py --key_path $KEY_PATH --passphrase_path $PASSPHRASE_PATH --automation_user $USER --account $ACCOUNT --db_name $DB_NAME --schema_name $CI_SCHEMA_NAME

fi