diff --git a/dags/solutions_team/configs/tensorflow/common.py b/dags/solutions_team/configs/tensorflow/common.py index 53b4da79..3995aa52 100644 --- a/dags/solutions_team/configs/tensorflow/common.py +++ b/dags/solutions_team/configs/tensorflow/common.py @@ -14,8 +14,6 @@ """Utilities to construct common configs.""" -from typing import Tuple - # Keras API AAA_CONNECTION = "aaa_connection" @@ -60,7 +58,7 @@ ) -def set_up_se_nightly() -> Tuple[str]: +def set_up_se_nightly() -> tuple[str]: """Adjust grpc_tpu_worker for SE tests""" return ( "sudo sed -i 's/TF_DOCKER_URL=.*/TF_DOCKER_URL=gcr.io\/cloud-tpu-v2-images-dev\/grpc_tpu_worker:nightly\"/' /etc/systemd/system/tpu-runtime.service", @@ -69,7 +67,7 @@ def set_up_se_nightly() -> Tuple[str]: ) -def install_tf_nightly() -> Tuple[str]: +def install_tf_nightly() -> tuple[str]: """Install tf nightly + libtpu.""" return ( "pip install tensorflow-text-nightly", @@ -79,17 +77,7 @@ def install_tf_nightly() -> Tuple[str]: ) -def install_tf_2_16() -> Tuple[str]: - """Install tf 2.16 + libtpu.""" - return ( - "pip install tensorflow-text-nightly", - "sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/tensorflow/2.16/2024-02-20/t*.whl /tmp/ && pip install /tmp/t*.whl --force", - "sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/libtpu/1.10.0/rc0/libtpu.so /lib/", - CMD_PRINT_TF_VERSION, - ) - - -def set_up_tensorflow_keras() -> Tuple[str]: +def set_up_tensorflow_keras() -> tuple[str]: """Common set up for tensorflow Keras tests.""" return ( CMD_INSTALL_KERAS_NIGHTLY, @@ -98,16 +86,7 @@ def set_up_tensorflow_keras() -> Tuple[str]: ) -def set_up_tensorflow_2_16_keras() -> Tuple[str]: - """Common set up for tensorflow Keras 2.16 tests.""" - return ( - "pip install --upgrade --force-reinstall tf-keras==2.16.0rc0", - "export PATH=$PATH:/root/google-cloud-sdk/bin && cd /tmp && sudo gcloud source repos clone tf2-api-tests --project=cloud-ml-auto-solutions", - "cd /tmp/tf2-api-tests && pip install behave matplotlib", - ) - - -def set_up_google_tensorflow_models() -> Tuple[str]: +def set_up_google_tensorflow_models() -> tuple[str]: """Common set up for tensorflow models.""" return ( 'if [ ! -d "/usr/share/tpu/models" ]; then sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu && git clone https://github.com/tensorflow/models.git; fi', @@ -115,14 +94,3 @@ def set_up_google_tensorflow_models() -> Tuple[str]: "pip install tensorflow-recommenders --no-deps", CMD_INSTALL_KERAS_NIGHTLY, ) - - -def set_up_google_tensorflow_2_16_models() -> Tuple[str]: - """Common set up for tensorflow models.""" - return ( - "sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu", - "sudo git clone -b r2.16.0 https://github.com/tensorflow/models.git", - "pip install -r /usr/share/tpu/models/official/requirements.txt", - "pip install tensorflow-recommenders --no-deps", - "pip install --upgrade --force-reinstall tf-keras==2.16.0rc0", - ) diff --git a/dags/solutions_team/configs/tensorflow/solutionsteam_tf_2_16_supported_config.py b/dags/solutions_team/configs/tensorflow/solutionsteam_tf_release_supported_config.py similarity index 84% rename from dags/solutions_team/configs/tensorflow/solutionsteam_tf_2_16_supported_config.py rename to dags/solutions_team/configs/tensorflow/solutionsteam_tf_release_supported_config.py index 3936cff7..207ca9ff 100644 --- a/dags/solutions_team/configs/tensorflow/solutionsteam_tf_2_16_supported_config.py +++ b/dags/solutions_team/configs/tensorflow/solutionsteam_tf_release_supported_config.py @@ -19,7 +19,44 @@ from dags.solutions_team.configs.tensorflow import common from airflow.models import Variable from dags.vm_resource import TpuVersion, Project, RuntimeVersion -from typing import List + + +MAJOR_VERSION = "2" +MINOR_VERSION = "16" +PATCH_VERSION = "1" +LIBTPU_VERSION = "1.10.0" +KERAS_VERSION = "2.16.0rc0" +MODELS_BRANCH = "r2.16.0" + + +def install_tf() -> tuple[str]: + """Install the release version of tf + libtpu.""" + return ( + "pip install tensorflow-text-nightly", + f"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/tensorflow/{MAJOR_VERSION}.{MINOR_VERSION}/2024-02-20/t*.whl /tmp/ && pip install /tmp/t*.whl --force", + f"sudo gsutil -m cp gs://cloud-tpu-v2-images-dev-artifacts/libtpu/{LIBTPU_VERSION}/rc0/libtpu.so /lib/", + common.CMD_PRINT_TF_VERSION, + ) + + +def set_up_keras() -> tuple[str]: + """Common set up for tensorflow Keras tests for the release.""" + return ( + f"pip install --upgrade --force-reinstall tf-keras=={KERAS_VERSION}", + "export PATH=$PATH:/root/google-cloud-sdk/bin && cd /tmp && sudo gcloud source repos clone tf2-api-tests --project=cloud-ml-auto-solutions", + "cd /tmp/tf2-api-tests && pip install behave matplotlib", + ) + + +def set_up_tensorflow_models() -> tuple[str]: + """Common set up for tensorflow models for the release.""" + return ( + "sudo mkdir -p /usr/share/tpu && cd /usr/share/tpu", + f"sudo git clone -b {MODELS_BRANCH} https://github.com/tensorflow/models.git", + "pip install -r /usr/share/tpu/models/official/requirements.txt", + "pip install tensorflow-recommenders --no-deps", + f"pip install --upgrade --force-reinstall tf-keras=={KERAS_VERSION}", + ) def get_tf_keras_config( @@ -42,10 +79,10 @@ def get_tf_keras_config( dataset_name=metric_config.DatasetOption.XLML_DATASET, ) - set_up_cmds = common.install_tf_2_16() + common.set_up_tensorflow_2_16_keras() + set_up_cmds = install_tf() + set_up_keras() if not is_pjrt and is_pod: set_up_cmds += common.set_up_se_nightly() - keras_test_name = f"tf_2_16_keras_api_{test_name}" + keras_test_name = f"tf_{MAJOR_VERSION}_{MINOR_VERSION}_keras_api_{test_name}" benchmark_id = f"{keras_test_name}-v{tpu_version.value}-{tpu_cores}" # Add default_var to pass DAG check # TODO(ranran): replace Variable.get() to XCOM when it applies @@ -110,9 +147,7 @@ def get_tf_resnet_config( dataset_name=metric_config.DatasetOption.XLML_DATASET, ) - set_up_cmds = ( - common.install_tf_2_16() + common.set_up_google_tensorflow_2_16_models() - ) + set_up_cmds = install_tf() + set_up_tensorflow_models() if not is_pjrt and is_pod: set_up_cmds += common.set_up_se_nightly() @@ -136,7 +171,7 @@ def get_tf_resnet_config( }, } - test_name = "tf_2_16_resnet_imagenet" + test_name = f"tf_{MAJOR_VERSION}_{MINOR_VERSION}_resnet_imagenet" benchmark_id = f"{test_name}-v{tpu_version.value}-{tpu_cores}" # Add default_var to pass DAG check # TODO(ranran): replace Variable.get() to XCOM when it applies @@ -183,7 +218,7 @@ def get_tf_dlrm_config( tpu_cores: int, tpu_zone: str, time_out_in_min: int, - bottom_mlp: List[int], + bottom_mlp: list[int], embedding_dim: int, train_steps: int, runtime_version: str, @@ -201,9 +236,7 @@ def get_tf_dlrm_config( dataset_name=metric_config.DatasetOption.XLML_DATASET, ) - set_up_cmds = ( - common.install_tf_2_16() + common.set_up_google_tensorflow_2_16_models() - ) + set_up_cmds = install_tf() + set_up_tensorflow_models() if not is_pjrt and is_pod: set_up_cmds += common.set_up_se_nightly() @@ -273,7 +306,7 @@ def get_tf_dlrm_config( }, } - test_name = "tf_2_16_dlrm_criteo" + test_name = f"tf_{MAJOR_VERSION}_{MINOR_VERSION}_dlrm_criteo" benchmark_id = f"{test_name}-v{tpu_version.value}-{tpu_cores}" # Add default_var to pass DAG check # TODO(ranran): replace Variable.get() to XCOM when it applies diff --git a/dags/solutions_team/solutionsteam_tf_2_16_se_supported.py b/dags/solutions_team/solutionsteam_tf_release_se_supported.py similarity index 95% rename from dags/solutions_team/solutionsteam_tf_2_16_se_supported.py rename to dags/solutions_team/solutionsteam_tf_release_se_supported.py index d0e6ce0b..9a8361b4 100644 --- a/dags/solutions_team/solutionsteam_tf_2_16_se_supported.py +++ b/dags/solutions_team/solutionsteam_tf_release_se_supported.py @@ -18,17 +18,19 @@ from airflow import models from dags import composer_env from dags.vm_resource import TpuVersion, Zone, RuntimeVersion, V5_NETWORKS, V5E_SUBNETWORKS, V5P_SUBNETWORKS -from dags.solutions_team.configs.tensorflow import solutionsteam_tf_2_16_supported_config as tf_config +from dags.solutions_team.configs.tensorflow import solutionsteam_tf_release_supported_config as tf_config from dags.solutions_team.configs.tensorflow import common # Release tests only need to run once, they can be run manually as needed SCHEDULED_TIME = None +VERSION = f"{tf_config.MAJOR_VERSION}.{tf_config.MINOR_VERSION}" + with models.DAG( - dag_id="tf_2_16_se_nightly_supported", + dag_id=f"tf_{tf_config.MAJOR_VERSION}_{tf_config.MINOR_VERSION}_se_nightly_supported", schedule=SCHEDULED_TIME, - tags=["solutions_team", "tf", "se", "2.16", "supported", "xlml"], + tags=["solutions_team", "tf", "se", VERSION, "supported", "xlml"], start_date=datetime.datetime(2024, 1, 4), catchup=False, ) as dag: diff --git a/dags/solutions_team/solutionsteam_tf_2_16_supported.py b/dags/solutions_team/solutionsteam_tf_release_supported.py similarity index 95% rename from dags/solutions_team/solutionsteam_tf_2_16_supported.py rename to dags/solutions_team/solutionsteam_tf_release_supported.py index 79198884..b05aeab8 100644 --- a/dags/solutions_team/solutionsteam_tf_2_16_supported.py +++ b/dags/solutions_team/solutionsteam_tf_release_supported.py @@ -18,18 +18,19 @@ from airflow import models from dags import composer_env from dags.vm_resource import TpuVersion, Project, Zone, RuntimeVersion, V5_NETWORKS, V5E_SUBNETWORKS, V5P_SUBNETWORKS -from dags.solutions_team.configs.tensorflow import solutionsteam_tf_2_16_supported_config as tf_config +from dags.solutions_team.configs.tensorflow import solutionsteam_tf_release_supported_config as tf_config from dags.solutions_team.configs.tensorflow import common # Release tests only need to run once, they can be run manually as needed SCHEDULED_TIME = None +VERSION = f"{tf_config.MAJOR_VERSION}.{tf_config.MINOR_VERSION}" with models.DAG( - dag_id="tf_2_16_nightly_supported", + dag_id=f"tf_{tf_config.MAJOR_VERSION}_{tf_config.MINOR_VERSION}_nightly_supported", schedule=SCHEDULED_TIME, - tags=["solutions_team", "tf", "2.16", "supported", "xlml"], + tags=["solutions_team", "tf", VERSION, "supported", "xlml"], start_date=datetime.datetime(2023, 8, 16), catchup=False, ) as dag: