fix: remove optional for reduction_server_replica_count, add comment …

…for _SPEC_ORDERS
googleapis · Oct 18, 2021 · 9dac2a9 · 9dac2a9
1 parent ce20743
commit 9dac2a9
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 15 deletions.
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
@@ -1061,7 +1061,7 @@ def from_local_script(
         accelerator_count: int = 0,
         boot_disk_type: str = "pd-ssd",
         boot_disk_size_gb: int = 100,
-        reduction_server_replica_count: Optional[int] = 0,
+        reduction_server_replica_count: int = 0,
         reduction_server_machine_type: Optional[str] = None,
         reduction_server_container_uri: Optional[str] = None,
         base_output_dir: Optional[str] = None,
@@ -1131,7 +1131,7 @@ def from_local_script(
                 Optional. Size in GB of the boot disk, default is 100GB.
                 boot disk size must be within the range of [100, 64000].
             reduction_server_replica_count (int):
-                Optional. The number of reduction server replicas.
+                The number of reduction server replicas, default is 0.
             reduction_server_machine_type (str):
                 Optional. The type of machine to use for reduction server.
             reduction_server_container_uri (str):
@@ -1209,7 +1209,7 @@ def from_local_script(
                 continue
 
             if (
-                spec_order == worker_spec_utils.SPEC_ORDERS["server_spec"]
+                spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"]
                 and reduction_server_replica_count > 0
             ):
                 spec["container_spec"] = {

diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
@@ -1278,7 +1278,7 @@ def _prepare_and_validate_run(
         accelerator_count: int = 0,
         boot_disk_type: str = "pd-ssd",
         boot_disk_size_gb: int = 100,
-        reduction_server_replica_count: Optional[int] = 0,
+        reduction_server_replica_count: int = 0,
         reduction_server_machine_type: Optional[str] = None,
     ) -> Tuple[worker_spec_utils._DistributedTrainingSpec, Optional[gca_model.Model]]:
         """Create worker pool specs and managed model as well validating the
@@ -1321,7 +1321,7 @@ def _prepare_and_validate_run(
                 Size in GB of the boot disk, default is 100GB.
                 boot disk size must be within the range of [100, 64000].
             reduction_server_replica_count (int):
-                Optional. The number of reduction server replicas.
+                The number of reduction server replicas, default is 0.
             reduction_server_machine_type (str):
                 Optional. The type of machine to use for reduction server.
         Returns:
@@ -1744,7 +1744,7 @@ def run(
         accelerator_count: int = 0,
         boot_disk_type: str = "pd-ssd",
         boot_disk_size_gb: int = 100,
-        reduction_server_replica_count: Optional[int] = 0,
+        reduction_server_replica_count: int = 0,
         reduction_server_machine_type: Optional[str] = None,
         reduction_server_container_uri: Optional[str] = None,
         training_fraction_split: Optional[float] = None,
@@ -1919,7 +1919,7 @@ def run(
                 Size in GB of the boot disk, default is 100GB.
                 boot disk size must be within the range of [100, 64000].
             reduction_server_replica_count (int):
-                Optional. The number of reduction server replicas.
+                The number of reduction server replicas, default is 0.
             reduction_server_machine_type (str):
                 Optional. The type of machine to use for reduction server.
             reduction_server_container_uri (str):
@@ -2229,7 +2229,7 @@ def _run(
                 continue
 
             if (
-                spec_order == worker_spec_utils.SPEC_ORDERS["server_spec"]
+                spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"]
                 and reduction_server_container_uri
             ):
                 spec["container_spec"] = {
@@ -2536,7 +2536,7 @@ def run(
         accelerator_count: int = 0,
         boot_disk_type: str = "pd-ssd",
         boot_disk_size_gb: int = 100,
-        reduction_server_replica_count: Optional[int] = 0,
+        reduction_server_replica_count: int = 0,
         reduction_server_machine_type: Optional[str] = None,
         reduction_server_container_uri: Optional[str] = None,
         training_fraction_split: Optional[float] = None,
@@ -2704,7 +2704,7 @@ def run(
                 Size in GB of the boot disk, default is 100GB.
                 boot disk size must be within the range of [100, 64000].
             reduction_server_replica_count (int):
-                Optional. The number of reduction server replicas.
+                The number of reduction server replicas, default is 0.
             reduction_server_machine_type (str):
                 Optional. The type of machine to use for reduction server.
             reduction_server_container_uri (str):
@@ -3003,7 +3003,7 @@ def _run(
                 continue
 
             if (
-                spec_order == worker_spec_utils.SPEC_ORDERS["server_spec"]
+                spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"]
                 and reduction_server_container_uri
             ):
                 spec["container_spec"] = {
@@ -5299,7 +5299,7 @@ def run(
         accelerator_count: int = 0,
         boot_disk_type: str = "pd-ssd",
         boot_disk_size_gb: int = 100,
-        reduction_server_replica_count: Optional[int] = 0,
+        reduction_server_replica_count: int = 0,
         reduction_server_machine_type: Optional[str] = None,
         reduction_server_container_uri: Optional[str] = None,
         training_fraction_split: Optional[float] = None,
@@ -5467,7 +5467,7 @@ def run(
                 Size in GB of the boot disk, default is 100GB.
                 boot disk size must be within the range of [100, 64000].
             reduction_server_replica_count (int):
-                Optional. The number of reduction server replicas.
+                The number of reduction server replicas, default is 0.
             reduction_server_machine_type (str):
                 Optional. The type of machine to use for reduction server.
             reduction_server_container_uri (str):
@@ -5747,7 +5747,7 @@ def _run(
                 continue
 
             if (
-                spec_order == worker_spec_utils.SPEC_ORDERS["server_spec"]
+                spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"]
                 and reduction_server_container_uri
             ):
                 spec["container_spec"] = {

diff --git a/google/cloud/aiplatform/utils/worker_spec_utils.py b/google/cloud/aiplatform/utils/worker_spec_utils.py
@@ -21,7 +21,11 @@
     accelerator_type as gca_accelerator_type_compat,
 )
 
-SPEC_ORDERS = {
+# `_SPEC_ORDERS` contains the worker pool spec type and its order in the `_WorkerPoolSpec`.
+# The `server_spec` supports either reduction server or parameter server, each
+# with different configuration for its `container_spec`. This mapping will be
+# used during configuration of `container_spec` for all worker pool specs.
+_SPEC_ORDERS = {
     "chief_spec": 0,
     "worker_spec": 1,
     "server_spec": 2,