Skip to content

Commit

Permalink
chore: Adds node pool affinities for pipelines using KubernetesPodOpe…
Browse files Browse the repository at this point in the history
…rator (#180)

* added node pool affinity for covid19_italy pipelines

* chore: node pool affinity for `austin_bikeshare`

* chore: node pool affinities for `google_political_ads` pipelines

* chore: node pool affinities for `irs_990` pipelines

* chore: node pool affinities for `noaa.gsod_stations` and `noaa.lightning_strikes_by_year`

* docs: adds node pool affinity example for `KubernetesPodOperator`
  • Loading branch information
adlersantos committed Sep 16, 2021
1 parent 2ad5581 commit d7d1200
Show file tree
Hide file tree
Showing 53 changed files with 713 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,23 @@
task_id="austin_bikeshare_stations_transform_csv",
name="bikeshare_stations",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.austin_bikeshare.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/austin_bikeshare/bikeshare_stations/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
17 changes: 17 additions & 0 deletions datasets/covid19_italy/data_by_province/data_by_province_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="covid19_italy_data_by_province",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.covid19_italy.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/covid19_italy/data_by_province/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
17 changes: 17 additions & 0 deletions datasets/covid19_italy/data_by_region/data_by_region_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="covid19_italy_data_by_region",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.covid19_italy.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/covid19_italy/data_by_region/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
17 changes: 17 additions & 0 deletions datasets/covid19_italy/national_trends/national_trends_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="covid19_italy_national_trends",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.covid19_italy.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/covid19_italy/national_trends/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="advertiser_declared_stats",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.google_political_ads.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment"s resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="advertiser_geo_spend",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.google_political_ads.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/google_political_ads/advertiser_geo_spend/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment"s resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="advertiser_stats",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.google_political_ads.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/google_political_ads/advertiser_stats/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="advertiser_weekly_spend",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.google_political_ads.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="campaign_targeting",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.google_political_ads.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down
10 changes: 10 additions & 0 deletions datasets/google_political_ads/campaign_targeting/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ dag:
# The namespace to run within Kubernetes. Always set its value to "default" because we follow the guideline that KubernetesPodOperator will only be used for very light workloads, i.e. use the Cloud Composer environment's resources without starving other pipelines.
namespace: "default"

affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: cloud.google.com/gke-nodepool
operator: In
values:
- "pool-e2-standard-4"

image_pull_policy: "Always"

# Docker images will be built and pushed to GCR by default whenever the `scripts/generate_dag.py` is run. To skip building and pushing images, use the optional `--skip-builds` flag.
Expand Down
17 changes: 17 additions & 0 deletions datasets/google_political_ads/creative_stats/creative_stats_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@
startup_timeout_seconds=600,
name="creative_stats",
namespace="default",
affinity={
"nodeAffinity": {
"requiredDuringSchedulingIgnoredDuringExecution": {
"nodeSelectorTerms": [
{
"matchExpressions": [
{
"key": "cloud.google.com/gke-nodepool",
"operator": "In",
"values": ["pool-e2-standard-4"],
}
]
}
]
}
}
},
image_pull_policy="Always",
image="{{ var.json.google_political_ads.container_registry.run_csv_transform_kub }}",
env_vars={
Expand Down

0 comments on commit d7d1200

Please sign in to comment.