feat(v1beta1): remove DOCKER/FLINK from Component enum; adds HBASE (#108

) Breaking change in v1beta1: 1. The `DOCKER` and `FLINK` values have been removed from the `Component` enum, and an `HBASE` value was added. Other changes: 1. There is a new `temp_bucket` field in `ClusterConfig`. 2. There is a new `preemptibility` field in `InstanceGroupConfig`. 3. The `project_id` field of `JobReference` is now optional instead of required. 4. There is a new `dag_timeout` field in `WorkflowTemplate`. 5. There are new `dag_timeout`, `dag_start_time`, and `dag_end_time` fields in `WorkflowMetadata`. 6. There are various updates to the doc comments.
googleapis · Dec 28, 2020 · ee093a8 · ee093a8
1 parent 8e96bdd
commit ee093a8
Show file tree

Hide file tree

Showing 15 changed files with 510 additions and 134 deletions.
diff --git a/google/cloud/dataproc_v1/__init__.py b/google/cloud/dataproc_v1/__init__.py
@@ -115,7 +115,6 @@
     "CancelJobRequest",
     "Cluster",
     "ClusterConfig",
-    "ClusterControllerClient",
     "ClusterMetrics",
     "ClusterOperation",
     "ClusterOperationMetadata",
@@ -192,6 +191,7 @@
     "WorkflowNode",
     "WorkflowTemplate",
     "WorkflowTemplatePlacement",
-    "YarnApplication",
     "WorkflowTemplateServiceClient",
+    "YarnApplication",
+    "ClusterControllerClient",
 )
diff --git a/google/cloud/dataproc_v1beta2/__init__.py b/google/cloud/dataproc_v1beta2/__init__.py
@@ -149,7 +149,6 @@
     "InstantiateInlineWorkflowTemplateRequest",
     "InstantiateWorkflowTemplateRequest",
     "Job",
-    "JobControllerClient",
     "JobMetadata",
     "JobPlacement",
     "JobReference",
@@ -194,6 +193,7 @@
     "WorkflowNode",
     "WorkflowTemplate",
     "WorkflowTemplatePlacement",
-    "YarnApplication",
     "WorkflowTemplateServiceClient",
+    "YarnApplication",
+    "JobControllerClient",
 )
diff --git a/google/cloud/dataproc_v1beta2/proto/autoscaling_policies.proto b/google/cloud/dataproc_v1beta2/proto/autoscaling_policies.proto
@@ -36,10 +36,12 @@ option (google.api.resource_definition) = {
 // Cloud Dataproc API.
 service AutoscalingPolicyService {
   option (google.api.default_host) = "dataproc.googleapis.com";
-  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
 
   // Creates new autoscaling policy.
-  rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
+  rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest)
+      returns (AutoscalingPolicy) {
     option (google.api.http) = {
       post: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
       body: "policy"
@@ -55,7 +57,8 @@ service AutoscalingPolicyService {
   //
   // Disabled check for update_mask, because all updates will be full
   // replacements.
-  rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
+  rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest)
+      returns (AutoscalingPolicy) {
     option (google.api.http) = {
       put: "/v1beta2/{policy.name=projects/*/locations/*/autoscalingPolicies/*}"
       body: "policy"
@@ -68,7 +71,8 @@ service AutoscalingPolicyService {
   }
 
   // Retrieves autoscaling policy.
-  rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
+  rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest)
+      returns (AutoscalingPolicy) {
     option (google.api.http) = {
       get: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
       additional_bindings {
@@ -79,7 +83,8 @@ service AutoscalingPolicyService {
   }
 
   // Lists autoscaling policies in the project.
-  rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest) returns (ListAutoscalingPoliciesResponse) {
+  rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest)
+      returns (ListAutoscalingPoliciesResponse) {
     option (google.api.http) = {
       get: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
       additional_bindings {
@@ -91,7 +96,8 @@ service AutoscalingPolicyService {
 
   // Deletes an autoscaling policy. It is an error to delete an autoscaling
   // policy that is in use by one or more clusters.
-  rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest) returns (google.protobuf.Empty) {
+  rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest)
+      returns (google.protobuf.Empty) {
     option (google.api.http) = {
       delete: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
       additional_bindings {
@@ -136,22 +142,26 @@ message AutoscalingPolicy {
   }
 
   // Required. Describes how the autoscaler will operate for primary workers.
-  InstanceGroupAutoscalingPolicyConfig worker_config = 4 [(google.api.field_behavior) = REQUIRED];
+  InstanceGroupAutoscalingPolicyConfig worker_config = 4
+      [(google.api.field_behavior) = REQUIRED];
 
   // Optional. Describes how the autoscaler will operate for secondary workers.
-  InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5 [(google.api.field_behavior) = OPTIONAL];
+  InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Basic algorithm for autoscaling.
 message BasicAutoscalingAlgorithm {
   // Required. YARN autoscaling configuration.
-  BasicYarnAutoscalingConfig yarn_config = 1 [(google.api.field_behavior) = REQUIRED];
+  BasicYarnAutoscalingConfig yarn_config = 1
+      [(google.api.field_behavior) = REQUIRED];
 
   // Optional. Duration between scaling events. A scaling period starts after
   // the update operation from the previous event has completed.
   //
   // Bounds: [2m, 1d]. Default: 2m.
-  google.protobuf.Duration cooldown_period = 2 [(google.api.field_behavior) = OPTIONAL];
+  google.protobuf.Duration cooldown_period = 2
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Basic autoscaling configurations for YARN.
@@ -162,22 +172,29 @@ message BasicYarnAutoscalingConfig {
   // downscaling operations.
   //
   // Bounds: [0s, 1d].
-  google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
-
-  // Required. Fraction of average pending memory in the last cooldown period
-  // for which to add workers. A scale-up factor of 1.0 will result in scaling
-  // up so that there is no pending memory remaining after the update (more
-  // aggressive scaling). A scale-up factor closer to 0 will result in a smaller
-  // magnitude of scaling up (less aggressive scaling).
+  google.protobuf.Duration graceful_decommission_timeout = 5
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Fraction of average YARN pending memory in the last cooldown
+  // period for which to add workers. A scale-up factor of 1.0 will result in
+  // scaling up so that there is no pending memory remaining after the update
+  // (more aggressive scaling). A scale-up factor closer to 0 will result in a
+  // smaller magnitude of scaling up (less aggressive scaling). See [How
+  // autoscaling
+  // works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
+  // for more information.
   //
   // Bounds: [0.0, 1.0].
   double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];
 
-  // Required. Fraction of average pending memory in the last cooldown period
-  // for which to remove workers. A scale-down factor of 1 will result in
+  // Required. Fraction of average YARN pending memory in the last cooldown
+  // period for which to remove workers. A scale-down factor of 1 will result in
   // scaling down so that there is no available memory remaining after the
   // update (more aggressive scaling). A scale-down factor of 0 disables
   // removing workers, which can be beneficial for autoscaling a single job.
+  // See [How autoscaling
+  // works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
+  // for more information.
   //
   // Bounds: [0.0, 1.0].
   double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
@@ -189,7 +206,8 @@ message BasicYarnAutoscalingConfig {
   // on any recommended change.
   //
   // Bounds: [0.0, 1.0]. Default: 0.0.
-  double scale_up_min_worker_fraction = 3 [(google.api.field_behavior) = OPTIONAL];
+  double scale_up_min_worker_fraction = 3
+      [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. Minimum scale-down threshold as a fraction of total cluster size
   // before scaling occurs. For example, in a 20-worker cluster, a threshold of
@@ -198,7 +216,8 @@ message BasicYarnAutoscalingConfig {
   // on any recommended change.
   //
   // Bounds: [0.0, 1.0]. Default: 0.0.
-  double scale_down_min_worker_fraction = 4 [(google.api.field_behavior) = OPTIONAL];
+  double scale_down_min_worker_fraction = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Configuration for the size bounds of an instance group, including its
@@ -341,7 +360,8 @@ message ListAutoscalingPoliciesRequest {
 // A response to a request to list autoscaling policies in a project.
 message ListAutoscalingPoliciesResponse {
   // Output only. Autoscaling policies list.
-  repeated AutoscalingPolicy policies = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+  repeated AutoscalingPolicy policies = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 
   // Output only. This token is included in the response if there are more
   // results to fetch.

diff --git a/google/cloud/dataproc_v1beta2/proto/clusters.proto b/google/cloud/dataproc_v1beta2/proto/clusters.proto
@@ -171,6 +171,17 @@ message ClusterConfig {
   // bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
   string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
+  // such as Spark and MapReduce history files.
+  // If you do not specify a temp bucket,
+  // Dataproc will determine a Cloud Storage location (US,
+  // ASIA, or EU) for your cluster's temp bucket according to the
+  // Compute Engine zone where your cluster is deployed, and then create
+  // and manage this project-level, per-location bucket. The default bucket has
+  // a TTL of 90 days, but you can use any TTL (or none) if you specify a
+  // bucket.
+  string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. The shared Compute Engine config settings for
   // all instances in a cluster.
   GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
@@ -330,7 +341,7 @@ message GceClusterConfig {
   bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The [Dataproc service
-  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
+  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
   // (also see [VM Data Plane
   // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
   // used by Dataproc cluster VM instances to access Google Cloud Platform
@@ -374,6 +385,27 @@ message GceClusterConfig {
 // The config settings for Compute Engine resources in
 // an instance group, such as a master or worker group.
 message InstanceGroupConfig {
+  // Controls the use of
+  // [preemptible instances]
+  // (https://cloud.google.com/compute/docs/instances/preemptible)
+  // within the group.
+  enum Preemptibility {
+    // Preemptibility is unspecified, the system will choose the
+    // appropriate setting for each instance group.
+    PREEMPTIBILITY_UNSPECIFIED = 0;
+
+    // Instances are non-preemptible.
+    //
+    // This option is allowed for all instance groups and is the only valid
+    // value for Master and Worker instance groups.
+    NON_PREEMPTIBLE = 1;
+
+    // Instances are preemptible.
+    //
+    // This option is allowed only for secondary worker groups.
+    PREEMPTIBLE = 2;
+  }
+
   // Optional. The number of VM instances in the instance group.
   // For master instance groups, must be set to 1.
   int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
@@ -424,6 +456,15 @@ message InstanceGroupConfig {
   // instances.
   bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 
+  // Optional. Specifies the preemptibility of the instance group.
+  //
+  // The default value for master and worker groups is
+  // `NON_PREEMPTIBLE`. This default cannot be changed.
+  //
+  // The default value for secondary instances is
+  // `PREEMPTIBLE`.
+  Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
+
   // Output only. The config for Compute Engine Instance Group
   // Manager that manages this group.
   // This is only used for preemptible instance groups.
@@ -685,7 +726,7 @@ message ClusterStatus {
 message SoftwareConfig {
   // Optional. The version of software inside the cluster. It must be one of the
   // supported [Dataproc
-  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
+  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
   // such as "1.2" (including a subminor version, such as "1.2.29"), or the
   // ["preview"
   // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).

diff --git a/google/cloud/dataproc_v1beta2/proto/jobs.proto b/google/cloud/dataproc_v1beta2/proto/jobs.proto
@@ -224,12 +224,12 @@ message SparkJob {
   // Spark driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of files to be copied to the working directory of
-  // Spark drivers and distributed tasks. Useful for naively parallel tasks.
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of archives to be extracted in the working directory
-  // of Spark drivers and tasks. Supported file types:
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 
@@ -265,11 +265,12 @@ message PySparkJob {
   // Python driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of files to be copied to the working directory of
-  // Python drivers and distributed tasks. Useful for naively parallel tasks.
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of archives to be extracted in the working directory of
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 
@@ -414,12 +415,12 @@ message SparkRJob {
   // occur that causes an incorrect job submission.
   repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of files to be copied to the working directory of
-  // R drivers and distributed tasks. Useful for naively parallel tasks.
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of archives to be extracted in the working directory of
-  // Spark drivers and tasks. Supported file types:
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
@@ -562,9 +563,9 @@ message JobStatus {
 
 // Encapsulates the full scoping used to reference a job.
 message JobReference {
-  // Required. The ID of the Google Cloud Platform project that the job
-  // belongs to.
-  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
+  // Optional. The ID of the Google Cloud Platform project that the job belongs to. If
+  // specified, must match the request project ID.
+  string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The job ID, which must be unique within the project.
   // The ID must contain only letters (a-z, A-Z), numbers (0-9),

diff --git a/google/cloud/dataproc_v1beta2/proto/shared.proto b/google/cloud/dataproc_v1beta2/proto/shared.proto
@@ -25,20 +25,17 @@ option java_package = "com.google.cloud.dataproc.v1beta2";
 
 // Cluster components that can be activated.
 enum Component {
-  // Unspecified component.
+  // Unspecified component. Specifying this will cause Cluster creation to fail.
   COMPONENT_UNSPECIFIED = 0;
 
   // The Anaconda python distribution.
   ANACONDA = 5;
 
-  // Docker
-  DOCKER = 13;
-
   // The Druid query engine.
   DRUID = 9;
 
-  // Flink
-  FLINK = 14;
+  // HBase.
+  HBASE = 11;
 
   // The Hive Web HCatalog (the REST service for accessing HCatalog).
   HIVE_WEBHCAT = 3;