Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

Commit

Permalink
feat(v1beta1): remove DOCKER/FLINK from Component enum; adds HBASE (#108
Browse files Browse the repository at this point in the history
)

Breaking change in v1beta1:

1. The `DOCKER` and `FLINK` values have been removed from the `Component` enum, and an `HBASE` value was added.

Other changes:

1. There is a new `temp_bucket` field in `ClusterConfig`.
2. There is a new `preemptibility` field in `InstanceGroupConfig`.
3. The `project_id` field of `JobReference` is now optional instead of required.
4. There is a new `dag_timeout` field in `WorkflowTemplate`.
5. There are new `dag_timeout`, `dag_start_time`, and `dag_end_time` fields in `WorkflowMetadata`.
6. There are various updates to the doc comments.
  • Loading branch information
yoshi-automation committed Dec 28, 2020
1 parent 8e96bdd commit ee093a8
Show file tree
Hide file tree
Showing 15 changed files with 510 additions and 134 deletions.
4 changes: 2 additions & 2 deletions google/cloud/dataproc_v1/__init__.py
Expand Up @@ -115,7 +115,6 @@
"CancelJobRequest",
"Cluster",
"ClusterConfig",
"ClusterControllerClient",
"ClusterMetrics",
"ClusterOperation",
"ClusterOperationMetadata",
Expand Down Expand Up @@ -192,6 +191,7 @@
"WorkflowNode",
"WorkflowTemplate",
"WorkflowTemplatePlacement",
"YarnApplication",
"WorkflowTemplateServiceClient",
"YarnApplication",
"ClusterControllerClient",
)
4 changes: 2 additions & 2 deletions google/cloud/dataproc_v1beta2/__init__.py
Expand Up @@ -149,7 +149,6 @@
"InstantiateInlineWorkflowTemplateRequest",
"InstantiateWorkflowTemplateRequest",
"Job",
"JobControllerClient",
"JobMetadata",
"JobPlacement",
"JobReference",
Expand Down Expand Up @@ -194,6 +193,7 @@
"WorkflowNode",
"WorkflowTemplate",
"WorkflowTemplatePlacement",
"YarnApplication",
"WorkflowTemplateServiceClient",
"YarnApplication",
"JobControllerClient",
)
64 changes: 42 additions & 22 deletions google/cloud/dataproc_v1beta2/proto/autoscaling_policies.proto
Expand Up @@ -36,10 +36,12 @@ option (google.api.resource_definition) = {
// Cloud Dataproc API.
service AutoscalingPolicyService {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";

// Creates new autoscaling policy.
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
body: "policy"
Expand All @@ -55,7 +57,8 @@ service AutoscalingPolicyService {
//
// Disabled check for update_mask, because all updates will be full
// replacements.
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
put: "/v1beta2/{policy.name=projects/*/locations/*/autoscalingPolicies/*}"
body: "policy"
Expand All @@ -68,7 +71,8 @@ service AutoscalingPolicyService {
}

// Retrieves autoscaling policy.
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
get: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
additional_bindings {
Expand All @@ -79,7 +83,8 @@ service AutoscalingPolicyService {
}

// Lists autoscaling policies in the project.
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest) returns (ListAutoscalingPoliciesResponse) {
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest)
returns (ListAutoscalingPoliciesResponse) {
option (google.api.http) = {
get: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
additional_bindings {
Expand All @@ -91,7 +96,8 @@ service AutoscalingPolicyService {

// Deletes an autoscaling policy. It is an error to delete an autoscaling
// policy that is in use by one or more clusters.
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest) returns (google.protobuf.Empty) {
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
additional_bindings {
Expand Down Expand Up @@ -136,22 +142,26 @@ message AutoscalingPolicy {
}

// Required. Describes how the autoscaler will operate for primary workers.
InstanceGroupAutoscalingPolicyConfig worker_config = 4 [(google.api.field_behavior) = REQUIRED];
InstanceGroupAutoscalingPolicyConfig worker_config = 4
[(google.api.field_behavior) = REQUIRED];

// Optional. Describes how the autoscaler will operate for secondary workers.
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5 [(google.api.field_behavior) = OPTIONAL];
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5
[(google.api.field_behavior) = OPTIONAL];
}

// Basic algorithm for autoscaling.
message BasicAutoscalingAlgorithm {
// Required. YARN autoscaling configuration.
BasicYarnAutoscalingConfig yarn_config = 1 [(google.api.field_behavior) = REQUIRED];
BasicYarnAutoscalingConfig yarn_config = 1
[(google.api.field_behavior) = REQUIRED];

// Optional. Duration between scaling events. A scaling period starts after
// the update operation from the previous event has completed.
//
// Bounds: [2m, 1d]. Default: 2m.
google.protobuf.Duration cooldown_period = 2 [(google.api.field_behavior) = OPTIONAL];
google.protobuf.Duration cooldown_period = 2
[(google.api.field_behavior) = OPTIONAL];
}

// Basic autoscaling configurations for YARN.
Expand All @@ -162,22 +172,29 @@ message BasicYarnAutoscalingConfig {
// downscaling operations.
//
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
google.protobuf.Duration graceful_decommission_timeout = 5
[(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average YARN pending memory in the last cooldown
// period for which to add workers. A scale-up factor of 1.0 will result in
// scaling up so that there is no pending memory remaining after the update
// (more aggressive scaling). A scale-up factor closer to 0 will result in a
// smaller magnitude of scaling up (less aggressive scaling). See [How
// autoscaling
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// Required. Fraction of average YARN pending memory in the last cooldown
// period for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
// See [How autoscaling
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
Expand All @@ -189,7 +206,8 @@ message BasicYarnAutoscalingConfig {
// on any recommended change.
//
// Bounds: [0.0, 1.0]. Default: 0.0.
double scale_up_min_worker_fraction = 3 [(google.api.field_behavior) = OPTIONAL];
double scale_up_min_worker_fraction = 3
[(google.api.field_behavior) = OPTIONAL];

// Optional. Minimum scale-down threshold as a fraction of total cluster size
// before scaling occurs. For example, in a 20-worker cluster, a threshold of
Expand All @@ -198,7 +216,8 @@ message BasicYarnAutoscalingConfig {
// on any recommended change.
//
// Bounds: [0.0, 1.0]. Default: 0.0.
double scale_down_min_worker_fraction = 4 [(google.api.field_behavior) = OPTIONAL];
double scale_down_min_worker_fraction = 4
[(google.api.field_behavior) = OPTIONAL];
}

// Configuration for the size bounds of an instance group, including its
Expand Down Expand Up @@ -341,7 +360,8 @@ message ListAutoscalingPoliciesRequest {
// A response to a request to list autoscaling policies in a project.
message ListAutoscalingPoliciesResponse {
// Output only. Autoscaling policies list.
repeated AutoscalingPolicy policies = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated AutoscalingPolicy policies = 1
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. This token is included in the response if there are more
// results to fetch.
Expand Down
45 changes: 43 additions & 2 deletions google/cloud/dataproc_v1beta2/proto/clusters.proto
Expand Up @@ -171,6 +171,17 @@ message ClusterConfig {
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
// such as Spark and MapReduce history files.
// If you do not specify a temp bucket,
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's temp bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket. The default bucket has
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
// bucket.
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -330,7 +341,7 @@ message GceClusterConfig {
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
Expand Down Expand Up @@ -374,6 +385,27 @@ message GceClusterConfig {
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of
// [preemptible instances]
// (https://cloud.google.com/compute/docs/instances/preemptible)
// within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;

// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;

// Instances are preemptible.
//
// This option is allowed only for secondary worker groups.
PREEMPTIBLE = 2;
}

// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -424,6 +456,15 @@ message InstanceGroupConfig {
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
Expand Down Expand Up @@ -685,7 +726,7 @@ message ClusterStatus {
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
Expand Down
29 changes: 15 additions & 14 deletions google/cloud/dataproc_v1beta2/proto/jobs.proto
Expand Up @@ -224,12 +224,12 @@ message SparkJob {
// Spark driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory
// of Spark drivers and tasks. Supported file types:
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -265,11 +265,12 @@ message PySparkJob {
// Python driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// Python drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory of
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -414,12 +415,12 @@ message SparkRJob {
// occur that causes an incorrect job submission.
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// R drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory of
// Spark drivers and tasks. Supported file types:
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -562,9 +563,9 @@ message JobStatus {

// Encapsulates the full scoping used to reference a job.
message JobReference {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The ID of the Google Cloud Platform project that the job belongs to. If
// specified, must match the request project ID.
string project_id = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The job ID, which must be unique within the project.
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
Expand Down
9 changes: 3 additions & 6 deletions google/cloud/dataproc_v1beta2/proto/shared.proto
Expand Up @@ -25,20 +25,17 @@ option java_package = "com.google.cloud.dataproc.v1beta2";

// Cluster components that can be activated.
enum Component {
// Unspecified component.
// Unspecified component. Specifying this will cause Cluster creation to fail.
COMPONENT_UNSPECIFIED = 0;

// The Anaconda python distribution.
ANACONDA = 5;

// Docker
DOCKER = 13;

// The Druid query engine.
DRUID = 9;

// Flink
FLINK = 14;
// HBase.
HBASE = 11;

// The Hive Web HCatalog (the REST service for accessing HCatalog).
HIVE_WEBHCAT = 3;
Expand Down

0 comments on commit ee093a8

Please sign in to comment.