Skip to content

Commit

Permalink
Refactored 2.16 release into a more reusable template and updated to …
Browse files Browse the repository at this point in the history
…2.15.1
  • Loading branch information
EricLeFort committed Mar 20, 2024
1 parent b3a8354 commit 6047b27
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 60 deletions.
102 changes: 65 additions & 37 deletions dags/solutions_team/configs/tensorflow/common.py
Expand Up @@ -14,7 +14,7 @@

"""Utilities to construct common configs."""

from typing import Tuple
from __future__ import annotations


# Keras API
Expand Down Expand Up @@ -60,7 +60,7 @@
)


def set_up_se_nightly() -> Tuple[str]:
def set_up_se_nightly() -> tuple[str]:
"""Adjust grpc_tpu_worker for SE tests"""
return (
"sudo sed -i 's/TF_DOCKER_URL=.*/TF_DOCKER_URL=gcr.io\/cloud-tpu-v2-images-dev\/grpc_tpu_worker:nightly\"/' /etc/systemd/system/tpu-runtime.service",
Expand All @@ -69,60 +69,88 @@ def set_up_se_nightly() -> Tuple[str]:
)


def install_tf_nightly() -> Tuple[str]:
"""Install tf nightly + libtpu."""
def install_tf(
major: Optional[int] = None,
minor: Optional[int] = None,
patch: Optional[int] = None,
libtpu_version: Optional[str] = None,
) -> tuple[str]:
"""Install tf + libtpu.
If the version numbers are set, installs that version. Otherwise just installs using nightly.
Either all of the version numbers need to be set or none of them should be set.
Args:
major (Optional[int]): The major version number
minor (Optional[int]): The minor version number
patch (Optional[int]): The minor version number
libtpu_version (Optional[str]): The libtpu version to install
"""
gs_version_str = "tf-nightly"
if any(x is not None for x in {major, minor, patch}):
msg = "All parts of a version should be specified if any of them are"
assert all(x is not None for x in {major, minor, patch}), msg
gs_version_str = f"tf-{major}-{minor}-{patch}"

libtpu_version_str = "latest"
if libtpu_version is not None:
libtpu_version_str = f"{libtpu_version}/latest"

return (
"pip install tensorflow-text-nightly",
"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/tensorflow/tf-nightly/latest/*.whl /tmp/ && pip install /tmp/tf*.whl --force",
"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/libtpu/latest/libtpu.so /lib/",
f"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/tensorflow/{gs_version_str}/latest/t*.whl /tmp/ && pip install /tmp/t*.whl --force",
f"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/libtpu/{libtpu_version_str}/libtpu.so /lib/",
CMD_PRINT_TF_VERSION,
)


def install_tf_2_16() -> Tuple[str]:
"""Install tf 2.16 + libtpu."""
return (
"pip install tensorflow-text-nightly",
"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/tensorflow/2.16/2024-02-20/t*.whl /tmp/ && pip install /tmp/t*.whl --force",
"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/libtpu/1.10.0/rc0/libtpu.so /lib/",
CMD_PRINT_TF_VERSION,
)
def set_up_keras(version: Optional[str] = None) -> tuple[str]:
"""Common set up for tensorflow Keras tests.
If a version is not set, defaults to nightly.
Args:
version(Optional[str]): The keras version to install
"""
cmd_install_keras = CMD_INSTALL_KERAS_NIGHTLY
if version is not None:
cmd_install_keras = (
f"pip install --upgrade --force-reinstall --no-deps tf-keras=={version}"
)

def set_up_tensorflow_keras() -> Tuple[str]:
"""Common set up for tensorflow Keras tests."""
return (
CMD_INSTALL_KERAS_NIGHTLY,
cmd_install_keras,
"export PATH=$PATH:/root/google-cloud-sdk/bin && cd /tmp && sudo gcloud source repos clone tf2-api-tests --project=cloud-ml-auto-solutions",
"cd /tmp/tf2-api-tests && pip install behave matplotlib",
)


def set_up_tensorflow_2_16_keras() -> Tuple[str]:
"""Common set up for tensorflow Keras 2.16 tests."""
return (
"pip install --upgrade --force-reinstall tf-keras==2.16.0rc0",
"export PATH=$PATH:/root/google-cloud-sdk/bin && cd /tmp && sudo gcloud source repos clone tf2-api-tests --project=cloud-ml-auto-solutions",
"cd /tmp/tf2-api-tests && pip install behave matplotlib",
)
def set_up_tensorflow_models(
models_branch: Optional[str] = None,
keras_version: Optional[str] = None,
) -> tuple[str]:
"""Common set up for tensorflow models for the release.
If any versions are not set, defaults to nightly.
def set_up_google_tensorflow_models() -> Tuple[str]:
"""Common set up for tensorflow models."""
return (
'if [ ! -d "/usr/share/tpu/models" ]; then sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu && git clone https://github.com/tensorflow/models.git; fi',
"pip install -r /usr/share/tpu/models/official/requirements.txt",
"pip install tensorflow-recommenders --no-deps",
CMD_INSTALL_KERAS_NIGHTLY,
)
Args:
models_branch (Optional[str]): The models branch to use
"""
if models_branch is None:
models_branch = "master"

cmd_install_keras = CMD_INSTALL_KERAS_NIGHTLY
if keras_version is not None:
cmd_install_keras = f"pip install --upgrade --force-reinstall --no-deps tf-keras=={keras_version}"

def set_up_google_tensorflow_2_16_models() -> Tuple[str]:
"""Common set up for tensorflow models."""
print("models branch ERIC")
print(models_branch)
print(
f'if [ ! -d "/usr/share/tpu/models" ]; then sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu && git clone -b {models_branch} https://github.com/tensorflow/models.git; fi'
)
return (
"sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu",
"sudo git clone -b r2.16.0 https://github.com/tensorflow/models.git",
f'if [ ! -d "/usr/share/tpu/models" ]; then sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu && sudo git clone -b {models_branch} https://github.com/tensorflow/models.git; fi',
"pip install -r /usr/share/tpu/models/official/requirements.txt",
"pip install tensorflow-recommenders --no-deps",
"pip install --upgrade --force-reinstall tf-keras==2.16.0rc0",
cmd_install_keras,
)
Expand Up @@ -42,7 +42,7 @@ def get_tf_keras_config(
dataset_name=metric_config.DatasetOption.XLML_DATASET,
)

set_up_cmds = common.install_tf_nightly() + common.set_up_tensorflow_keras()
set_up_cmds = common.set_up_keras() + common.install_tf()
if not is_pjrt and is_pod:
set_up_cmds += common.set_up_se_nightly()
keras_test_name = f"tf_keras_api_{test_name}"
Expand Down Expand Up @@ -110,9 +110,7 @@ def get_tf_resnet_config(
dataset_name=metric_config.DatasetOption.XLML_DATASET,
)

set_up_cmds = (
common.install_tf_nightly() + common.set_up_google_tensorflow_models()
)
set_up_cmds = common.set_up_tensorflow_models() + common.install_tf()
if not is_pjrt and is_pod:
set_up_cmds += common.set_up_se_nightly()

Expand Down Expand Up @@ -201,9 +199,7 @@ def get_tf_dlrm_config(
dataset_name=metric_config.DatasetOption.XLML_DATASET,
)

set_up_cmds = (
common.install_tf_nightly() + common.set_up_google_tensorflow_models()
)
set_up_cmds = common.set_up_tensorflow_models() + common.install_tf()
if not is_pjrt and is_pod:
set_up_cmds += common.set_up_se_nightly()

Expand Down
Expand Up @@ -14,12 +14,23 @@

"""Utilities to construct configs for solutionsteam_tf_nightly_supported DAG."""

from __future__ import annotations

from xlml.apis import gcp_config, metric_config, task, test_config
from dags import gcs_bucket, test_owner
from dags.solutions_team.configs.tensorflow import common
from airflow.models import Variable
from dags.vm_resource import TpuVersion, Project, RuntimeVersion
from typing import List


MAJOR_VERSION = "2"
MINOR_VERSION = "15"
PATCH_VERSION = "1"
LIBTPU_VERSION = "1.9.0"
KERAS_VERSION = "2.15.1"
MODELS_BRANCH = "r2.15.0"

GS_VERSION_STR = f"tf-{MAJOR_VERSION}-{MINOR_VERSION}-{PATCH_VERSION}"


def get_tf_keras_config(
Expand All @@ -42,10 +53,13 @@ def get_tf_keras_config(
dataset_name=metric_config.DatasetOption.XLML_DATASET,
)

set_up_cmds = common.install_tf_2_16() + common.set_up_tensorflow_2_16_keras()
set_up_cmds = common.set_up_keras(KERAS_VERSION)
set_up_cmds += common.install_tf(
MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION, LIBTPU_VERSION
)
if not is_pjrt and is_pod:
set_up_cmds += common.set_up_se_nightly()
keras_test_name = f"tf_2_16_keras_api_{test_name}"
keras_test_name = f"tf_{MAJOR_VERSION}_{MINOR_VERSION}_keras_api_{test_name}"
benchmark_id = f"{keras_test_name}-v{tpu_version.value}-{tpu_cores}"
# Add default_var to pass DAG check
# TODO(ranran): replace Variable.get() to XCOM when it applies
Expand Down Expand Up @@ -110,8 +124,9 @@ def get_tf_resnet_config(
dataset_name=metric_config.DatasetOption.XLML_DATASET,
)

set_up_cmds = (
common.install_tf_2_16() + common.set_up_google_tensorflow_2_16_models()
set_up_cmds = common.set_up_tensorflow_models(MODELS_BRANCH, KERAS_VERSION)
set_up_cmds += common.install_tf(
MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION, LIBTPU_VERSION
)
if not is_pjrt and is_pod:
set_up_cmds += common.set_up_se_nightly()
Expand All @@ -136,7 +151,7 @@ def get_tf_resnet_config(
},
}

test_name = "tf_2_16_resnet_imagenet"
test_name = f"tf_{MAJOR_VERSION}_{MINOR_VERSION}_resnet_imagenet"
benchmark_id = f"{test_name}-v{tpu_version.value}-{tpu_cores}"
# Add default_var to pass DAG check
# TODO(ranran): replace Variable.get() to XCOM when it applies
Expand Down Expand Up @@ -183,7 +198,7 @@ def get_tf_dlrm_config(
tpu_cores: int,
tpu_zone: str,
time_out_in_min: int,
bottom_mlp: List[int],
bottom_mlp: list[int],
embedding_dim: int,
train_steps: int,
runtime_version: str,
Expand All @@ -201,8 +216,9 @@ def get_tf_dlrm_config(
dataset_name=metric_config.DatasetOption.XLML_DATASET,
)

set_up_cmds = (
common.install_tf_2_16() + common.set_up_google_tensorflow_2_16_models()
set_up_cmds = common.set_up_tensorflow_models(MODELS_BRANCH, KERAS_VERSION)
set_up_cmds += common.install_tf(
MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION, LIBTPU_VERSION
)
if not is_pjrt and is_pod:
set_up_cmds += common.set_up_se_nightly()
Expand Down Expand Up @@ -273,7 +289,7 @@ def get_tf_dlrm_config(
},
}

test_name = "tf_2_16_dlrm_criteo"
test_name = f"tf_{MAJOR_VERSION}_{MINOR_VERSION}_dlrm_criteo"
benchmark_id = f"{test_name}-v{tpu_version.value}-{tpu_cores}"
# Add default_var to pass DAG check
# TODO(ranran): replace Variable.get() to XCOM when it applies
Expand Down
Expand Up @@ -18,17 +18,19 @@
from airflow import models
from dags import composer_env
from dags.vm_resource import TpuVersion, Zone, RuntimeVersion, V5_NETWORKS, V5E_SUBNETWORKS, V5P_SUBNETWORKS
from dags.solutions_team.configs.tensorflow import solutionsteam_tf_2_16_supported_config as tf_config
from dags.solutions_team.configs.tensorflow import solutionsteam_tf_release_supported_config as tf_config
from dags.solutions_team.configs.tensorflow import common


# Release tests only need to run once, they can be run manually as needed
SCHEDULED_TIME = None
VERSION = f"{tf_config.MAJOR_VERSION}.{tf_config.MINOR_VERSION}"


with models.DAG(
dag_id="tf_2_16_se_nightly_supported",
dag_id=f"tf_{tf_config.MAJOR_VERSION}_{tf_config.MINOR_VERSION}_se_nightly_supported",
schedule=SCHEDULED_TIME,
tags=["solutions_team", "tf", "se", "2.16", "supported", "xlml"],
tags=["solutions_team", "tf", "se", VERSION, "supported", "xlml"],
start_date=datetime.datetime(2024, 1, 4),
catchup=False,
) as dag:
Expand Down
Expand Up @@ -18,18 +18,19 @@
from airflow import models
from dags import composer_env
from dags.vm_resource import TpuVersion, Project, Zone, RuntimeVersion, V5_NETWORKS, V5E_SUBNETWORKS, V5P_SUBNETWORKS
from dags.solutions_team.configs.tensorflow import solutionsteam_tf_2_16_supported_config as tf_config
from dags.solutions_team.configs.tensorflow import solutionsteam_tf_release_supported_config as tf_config
from dags.solutions_team.configs.tensorflow import common


# Release tests only need to run once, they can be run manually as needed
SCHEDULED_TIME = None
VERSION = f"{tf_config.MAJOR_VERSION}.{tf_config.MINOR_VERSION}"


with models.DAG(
dag_id="tf_2_16_nightly_supported",
dag_id=f"tf_{tf_config.MAJOR_VERSION}_{tf_config.MINOR_VERSION}_nightly_supported",
schedule=SCHEDULED_TIME,
tags=["solutions_team", "tf", "2.16", "supported", "xlml"],
tags=["solutions_team", "tf", VERSION, "supported", "xlml"],
start_date=datetime.datetime(2023, 8, 16),
catchup=False,
) as dag:
Expand Down

0 comments on commit 6047b27

Please sign in to comment.