Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

feat: add support for temp_bucket, endpoint_config in clusters; add preemptibility for instance group configs #60

Merged
merged 2 commits into from Jul 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 25 additions & 1 deletion google/cloud/dataproc_v1/gapic/enums.py
Expand Up @@ -24,7 +24,7 @@ class Component(enum.IntEnum):
Cluster components that can be activated.

Attributes:
COMPONENT_UNSPECIFIED (int): Unspecified component.
COMPONENT_UNSPECIFIED (int): Unspecified component. Specifying this will cause Cluster creation to fail.
ANACONDA (int): The Anaconda python distribution.
HIVE_WEBHCAT (int): The Hive Web HCatalog (the REST service for accessing HCatalog).
JUPYTER (int): The Jupyter Notebook.
Expand Down Expand Up @@ -103,6 +103,30 @@ class Substate(enum.IntEnum):
STALE_STATUS = 2


class InstanceGroupConfig(object):
class Preemptibility(enum.IntEnum):
"""
Controls the use of [preemptible instances]
(https://cloud.google.com/compute/docs/instances/preemptible) within the
group.

Attributes:
PREEMPTIBILITY_UNSPECIFIED (int): Preemptibility is unspecified, the system will choose the
appropriate setting for each instance group.
NON_PREEMPTIBLE (int): Instances are non-preemptible.

This option is allowed for all instance groups and is the only valid
value for Master and Worker instance groups.
PREEMPTIBLE (int): Instances are preemptible.

This option is allowed only for secondary worker groups.
"""

PREEMPTIBILITY_UNSPECIFIED = 0
NON_PREEMPTIBLE = 1
PREEMPTIBLE = 2


class JobStatus(object):
class State(enum.IntEnum):
"""
Expand Down
10 changes: 8 additions & 2 deletions google/cloud/dataproc_v1/proto/autoscaling_policies.proto
Expand Up @@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
Expand Down
15 changes: 10 additions & 5 deletions google/cloud/dataproc_v1/proto/autoscaling_policies_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 57 additions & 2 deletions google/cloud/dataproc_v1/proto/clusters.proto
Expand Up @@ -170,6 +170,17 @@ message ClusterConfig {
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
// such as Spark and MapReduce history files.
// If you do not specify a temp bucket,
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's temp bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket. The default bucket has
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
// bucket.
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -216,6 +227,20 @@ message ClusterConfig {

// Optional. Lifecycle setting for the cluster.
LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];

// Optional. Port/endpoint configuration for this cluster
EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
}

// Endpoint config for this cluster
message EndpointConfig {
// Output only. The map of port descriptions to URLs. Will only be populated
// if enable_http_port_access is true.
map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. If true, enable http access to specific ports on the cluster
// from external sources. Defaults to false.
bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Autoscaling Policy config associated with the cluster.
Expand Down Expand Up @@ -288,7 +313,7 @@ message GceClusterConfig {
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
Expand Down Expand Up @@ -332,6 +357,27 @@ message GceClusterConfig {
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of
// [preemptible instances]
// (https://cloud.google.com/compute/docs/instances/preemptible)
// within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;

// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;

// Instances are preemptible.
//
// This option is allowed only for secondary worker groups.
PREEMPTIBLE = 2;
}

// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -382,6 +428,15 @@ message InstanceGroupConfig {
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
Expand Down Expand Up @@ -608,7 +663,7 @@ message KerberosConfig {
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
Expand Down