Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

feat(v1beta1): remove DOCKER/FLINK from Component enum; adds HBASE #108

Merged
merged 4 commits into from Dec 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions google/cloud/dataproc_v1/__init__.py
Expand Up @@ -115,7 +115,6 @@
"CancelJobRequest",
"Cluster",
"ClusterConfig",
"ClusterControllerClient",
"ClusterMetrics",
"ClusterOperation",
"ClusterOperationMetadata",
Expand Down Expand Up @@ -192,6 +191,7 @@
"WorkflowNode",
"WorkflowTemplate",
"WorkflowTemplatePlacement",
"YarnApplication",
"WorkflowTemplateServiceClient",
"YarnApplication",
"ClusterControllerClient",
)
4 changes: 2 additions & 2 deletions google/cloud/dataproc_v1beta2/__init__.py
Expand Up @@ -149,7 +149,6 @@
"InstantiateInlineWorkflowTemplateRequest",
"InstantiateWorkflowTemplateRequest",
"Job",
"JobControllerClient",
"JobMetadata",
"JobPlacement",
"JobReference",
Expand Down Expand Up @@ -194,6 +193,7 @@
"WorkflowNode",
"WorkflowTemplate",
"WorkflowTemplatePlacement",
"YarnApplication",
"WorkflowTemplateServiceClient",
"YarnApplication",
"JobControllerClient",
)
64 changes: 42 additions & 22 deletions google/cloud/dataproc_v1beta2/proto/autoscaling_policies.proto
Expand Up @@ -36,10 +36,12 @@ option (google.api.resource_definition) = {
// Cloud Dataproc API.
service AutoscalingPolicyService {
option (google.api.default_host) = "dataproc.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-platform";

// Creates new autoscaling policy.
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc CreateAutoscalingPolicy(CreateAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
body: "policy"
Expand All @@ -55,7 +57,8 @@ service AutoscalingPolicyService {
//
// Disabled check for update_mask, because all updates will be full
// replacements.
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc UpdateAutoscalingPolicy(UpdateAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
put: "/v1beta2/{policy.name=projects/*/locations/*/autoscalingPolicies/*}"
body: "policy"
Expand All @@ -68,7 +71,8 @@ service AutoscalingPolicyService {
}

// Retrieves autoscaling policy.
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest) returns (AutoscalingPolicy) {
rpc GetAutoscalingPolicy(GetAutoscalingPolicyRequest)
returns (AutoscalingPolicy) {
option (google.api.http) = {
get: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
additional_bindings {
Expand All @@ -79,7 +83,8 @@ service AutoscalingPolicyService {
}

// Lists autoscaling policies in the project.
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest) returns (ListAutoscalingPoliciesResponse) {
rpc ListAutoscalingPolicies(ListAutoscalingPoliciesRequest)
returns (ListAutoscalingPoliciesResponse) {
option (google.api.http) = {
get: "/v1beta2/{parent=projects/*/locations/*}/autoscalingPolicies"
additional_bindings {
Expand All @@ -91,7 +96,8 @@ service AutoscalingPolicyService {

// Deletes an autoscaling policy. It is an error to delete an autoscaling
// policy that is in use by one or more clusters.
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest) returns (google.protobuf.Empty) {
rpc DeleteAutoscalingPolicy(DeleteAutoscalingPolicyRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v1beta2/{name=projects/*/locations/*/autoscalingPolicies/*}"
additional_bindings {
Expand Down Expand Up @@ -136,22 +142,26 @@ message AutoscalingPolicy {
}

// Required. Describes how the autoscaler will operate for primary workers.
InstanceGroupAutoscalingPolicyConfig worker_config = 4 [(google.api.field_behavior) = REQUIRED];
InstanceGroupAutoscalingPolicyConfig worker_config = 4
[(google.api.field_behavior) = REQUIRED];

// Optional. Describes how the autoscaler will operate for secondary workers.
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5 [(google.api.field_behavior) = OPTIONAL];
InstanceGroupAutoscalingPolicyConfig secondary_worker_config = 5
[(google.api.field_behavior) = OPTIONAL];
}

// Basic algorithm for autoscaling.
message BasicAutoscalingAlgorithm {
// Required. YARN autoscaling configuration.
BasicYarnAutoscalingConfig yarn_config = 1 [(google.api.field_behavior) = REQUIRED];
BasicYarnAutoscalingConfig yarn_config = 1
[(google.api.field_behavior) = REQUIRED];

// Optional. Duration between scaling events. A scaling period starts after
// the update operation from the previous event has completed.
//
// Bounds: [2m, 1d]. Default: 2m.
google.protobuf.Duration cooldown_period = 2 [(google.api.field_behavior) = OPTIONAL];
google.protobuf.Duration cooldown_period = 2
[(google.api.field_behavior) = OPTIONAL];
}

// Basic autoscaling configurations for YARN.
Expand All @@ -162,22 +172,29 @@ message BasicYarnAutoscalingConfig {
// downscaling operations.
//
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
google.protobuf.Duration graceful_decommission_timeout = 5
[(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average YARN pending memory in the last cooldown
// period for which to add workers. A scale-up factor of 1.0 will result in
// scaling up so that there is no pending memory remaining after the update
// (more aggressive scaling). A scale-up factor closer to 0 will result in a
// smaller magnitude of scaling up (less aggressive scaling). See [How
// autoscaling
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// Required. Fraction of average YARN pending memory in the last cooldown
// period for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
// See [How autoscaling
// works](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
Expand All @@ -189,7 +206,8 @@ message BasicYarnAutoscalingConfig {
// on any recommended change.
//
// Bounds: [0.0, 1.0]. Default: 0.0.
double scale_up_min_worker_fraction = 3 [(google.api.field_behavior) = OPTIONAL];
double scale_up_min_worker_fraction = 3
[(google.api.field_behavior) = OPTIONAL];

// Optional. Minimum scale-down threshold as a fraction of total cluster size
// before scaling occurs. For example, in a 20-worker cluster, a threshold of
Expand All @@ -198,7 +216,8 @@ message BasicYarnAutoscalingConfig {
// on any recommended change.
//
// Bounds: [0.0, 1.0]. Default: 0.0.
double scale_down_min_worker_fraction = 4 [(google.api.field_behavior) = OPTIONAL];
double scale_down_min_worker_fraction = 4
[(google.api.field_behavior) = OPTIONAL];
}

// Configuration for the size bounds of an instance group, including its
Expand Down Expand Up @@ -341,7 +360,8 @@ message ListAutoscalingPoliciesRequest {
// A response to a request to list autoscaling policies in a project.
message ListAutoscalingPoliciesResponse {
// Output only. Autoscaling policies list.
repeated AutoscalingPolicy policies = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
repeated AutoscalingPolicy policies = 1
[(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. This token is included in the response if there are more
// results to fetch.
Expand Down
45 changes: 43 additions & 2 deletions google/cloud/dataproc_v1beta2/proto/clusters.proto
Expand Up @@ -171,6 +171,17 @@ message ClusterConfig {
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
// such as Spark and MapReduce history files.
// If you do not specify a temp bucket,
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's temp bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket. The default bucket has
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
// bucket.
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -330,7 +341,7 @@ message GceClusterConfig {
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
Expand Down Expand Up @@ -374,6 +385,27 @@ message GceClusterConfig {
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of
// [preemptible instances]
// (https://cloud.google.com/compute/docs/instances/preemptible)
// within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;

// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;

// Instances are preemptible.
//
// This option is allowed only for secondary worker groups.
PREEMPTIBLE = 2;
}

// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -424,6 +456,15 @@ message InstanceGroupConfig {
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
Expand Down Expand Up @@ -685,7 +726,7 @@ message ClusterStatus {
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
Expand Down
29 changes: 15 additions & 14 deletions google/cloud/dataproc_v1beta2/proto/jobs.proto
Expand Up @@ -224,12 +224,12 @@ message SparkJob {
// Spark driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory
// of Spark drivers and tasks. Supported file types:
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -265,11 +265,12 @@ message PySparkJob {
// Python driver and tasks.
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// Python drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory of
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -414,12 +415,12 @@ message SparkRJob {
// occur that causes an incorrect job submission.
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of files to be copied to the working directory of
// R drivers and distributed tasks. Useful for naively parallel tasks.
// Optional. HCFS URIs of files to be placed in the working directory of
// each executor. Useful for naively parallel tasks.
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. HCFS URIs of archives to be extracted in the working directory of
// Spark drivers and tasks. Supported file types:
// Optional. HCFS URIs of archives to be extracted into the working directory
// of each executor. Supported file types:
// .jar, .tar, .tar.gz, .tgz, and .zip.
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];

Expand Down Expand Up @@ -562,9 +563,9 @@ message JobStatus {

// Encapsulates the full scoping used to reference a job.
message JobReference {
// Required. The ID of the Google Cloud Platform project that the job
// belongs to.
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The ID of the Google Cloud Platform project that the job belongs to. If
// specified, must match the request project ID.
string project_id = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. The job ID, which must be unique within the project.
// The ID must contain only letters (a-z, A-Z), numbers (0-9),
Expand Down
9 changes: 3 additions & 6 deletions google/cloud/dataproc_v1beta2/proto/shared.proto
Expand Up @@ -25,20 +25,17 @@ option java_package = "com.google.cloud.dataproc.v1beta2";

// Cluster components that can be activated.
enum Component {
// Unspecified component.
// Unspecified component. Specifying this will cause Cluster creation to fail.
COMPONENT_UNSPECIFIED = 0;

// The Anaconda python distribution.
ANACONDA = 5;

// Docker
DOCKER = 13;

// The Druid query engine.
DRUID = 9;

// Flink
FLINK = 14;
// HBase.
HBASE = 11;

// The Hive Web HCatalog (the REST service for accessing HCatalog).
HIVE_WEBHCAT = 3;
Expand Down