From 90a49279204f1b8aaa18f750a9515870cbdbffd1 Mon Sep 17 00:00:00 2001 From: Ashmeen Kaur <57195160+ashmeenkaur@users.noreply.github.com> Date: Thu, 21 Dec 2023 18:19:48 +0530 Subject: [PATCH] Fix tf model by upgrading driver version (#1557) * change dependencies to use CUDA 11.8 * temp commit * upgrade ubuntu image as old is deprecated * reduce sleep * upgrade driver version * revert to newer version * Revert "reduce sleep" This reverts commit 69828baff98e332c58b7901bb7285f8177f8a7d0. * Revert "temp commit" This reverts commit d1cce7fb6820b6e52704da7ada58cf94e6dafe5a. --- .../scripts/continuous_test/ml_tests/run_and_manage_test.sh | 2 +- .../scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh | 2 +- .../scripts/ml_tests/tf/resnet/setup_scripts/setup_container.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/perfmetrics/scripts/continuous_test/ml_tests/run_and_manage_test.sh b/perfmetrics/scripts/continuous_test/ml_tests/run_and_manage_test.sh index 3c0b9b7be0..9a8c13a9f4 100755 --- a/perfmetrics/scripts/continuous_test/ml_tests/run_and_manage_test.sh +++ b/perfmetrics/scripts/continuous_test/ml_tests/run_and_manage_test.sh @@ -82,7 +82,7 @@ function delete_existing_vm_and_create_new () { --service-account=927584127901-compute@developer.gserviceaccount.com \ --scopes=https://www.googleapis.com/auth/cloud-platform \ --accelerator=$ACCELERATOR \ - --create-disk=auto-delete=yes,boot=yes,device-name=$VM_NAME,image=projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20230616,mode=rw,size=150,type=projects/$GCP_PROJECT/zones/$ZONE_NAME/diskTypes/pd-balanced \ + --create-disk=auto-delete=yes,boot=yes,device-name=$VM_NAME,image=projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20231213,mode=rw,size=150,type=projects/$GCP_PROJECT/zones/$ZONE_NAME/diskTypes/pd-balanced \ --no-shielded-secure-boot \ --shielded-vtpm \ --shielded-integrity-monitoring \ diff --git a/perfmetrics/scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh b/perfmetrics/scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh index cad74a8948..d24516c2b9 100755 --- a/perfmetrics/scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh +++ b/perfmetrics/scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh @@ -6,7 +6,7 @@ set -e cd "$HOME/github/gcsfuse/perfmetrics/scripts" echo "Setting up the machine with Docker and Nvidia Driver..." -DRIVER_VERSION="450.172.01" +DRIVER_VERSION="520.61.05" source ml_tests/setup_host.sh $DRIVER_VERSION cd "$HOME/github/gcsfuse/" diff --git a/perfmetrics/scripts/ml_tests/tf/resnet/setup_scripts/setup_container.sh b/perfmetrics/scripts/ml_tests/tf/resnet/setup_scripts/setup_container.sh index e3858901d4..dcb0450dcc 100755 --- a/perfmetrics/scripts/ml_tests/tf/resnet/setup_scripts/setup_container.sh +++ b/perfmetrics/scripts/ml_tests/tf/resnet/setup_scripts/setup_container.sh @@ -1,7 +1,7 @@ #!/bin/bash # Installs go1.21 on the container, builds gcsfuse using log_rotation file -# and installs tf-models-official v2.13.0, makes update to include clear_kernel_cache +# and installs tf-models-official v2.13.2, makes update to include clear_kernel_cache # and epochs functionality, and runs the model # Install go lang