Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: updates for v1beta2 storage API - Updated comments on BatchCommitWriteStreams - Added new support Bigquery types BIGNUMERIC and INTERVAL to TableSchema - Added read rows schema in ReadRowsResponse - Misc comment updates #172

Merged
merged 1 commit into from Apr 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion google/cloud/bigquery_storage_v1beta2/proto/arrow.proto
@@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/bigquery_storage_v1beta2/proto/avro.proto
@@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
5 changes: 2 additions & 3 deletions google/cloud/bigquery_storage_v1beta2/proto/protobuf.proto
@@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -23,15 +23,14 @@ option java_multiple_files = true;
option java_outer_classname = "ProtoBufProto";
option java_package = "com.google.cloud.bigquery.storage.v1beta2";

// Protobuf schema is an API presentation the proto buffer schema.
// ProtoSchema describes the schema of the serialized protocol buffer data rows.
message ProtoSchema {
// Descriptor for input message. The descriptor has to be self contained,
// including all the nested types, excepted for proto buffer well known types
// (https://developers.google.com/protocol-buffers/docs/reference/google.protobuf).
google.protobuf.DescriptorProto proto_descriptor = 1;
}

// Protobuf rows.
message ProtoRows {
// A sequence of rows serialized as a Protocol Buffer.
//
Expand Down
100 changes: 64 additions & 36 deletions google/cloud/bigquery_storage_v1beta2/proto/storage.proto
@@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -71,8 +71,7 @@ service BigQueryRead {
post: "/v1beta2/{read_session.table=projects/*/datasets/*/tables/*}"
body: "*"
};
option (google.api.method_signature) =
"parent,read_session,max_stream_count";
option (google.api.method_signature) = "parent,read_session,max_stream_count";
}

// Reads rows from the stream in the format prescribed by the ReadSession.
Expand Down Expand Up @@ -101,8 +100,7 @@ service BigQueryRead {
// original, primary, and residual, that original[0-j] = primary[0-j] and
// original[j-n] = residual[0-m] once the streams have been read to
// completion.
rpc SplitReadStream(SplitReadStreamRequest)
returns (SplitReadStreamResponse) {
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
option (google.api.http) = {
get: "/v1beta2/{name=projects/*/locations/*/sessions/*/streams/*}"
};
Expand Down Expand Up @@ -171,8 +169,7 @@ service BigQueryWrite {

// Finalize a write stream so that no new data can be appended to the
// stream. Finalize is not supported on the '_default' stream.
rpc FinalizeWriteStream(FinalizeWriteStreamRequest)
returns (FinalizeWriteStreamResponse) {
rpc FinalizeWriteStream(FinalizeWriteStreamRequest) returns (FinalizeWriteStreamResponse) {
option (google.api.http) = {
post: "/v1beta2/{name=projects/*/datasets/*/tables/*/streams/*}"
body: "*"
Expand All @@ -185,8 +182,7 @@ service BigQueryWrite {
// Streams must be finalized before commit and cannot be committed multiple
// times. Once a stream is committed, data in the stream becomes available
// for read operations.
rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest)
returns (BatchCommitWriteStreamsResponse) {
rpc BatchCommitWriteStreams(BatchCommitWriteStreamsRequest) returns (BatchCommitWriteStreamsResponse) {
option (google.api.http) = {
get: "/v1beta2/{parent=projects/*/datasets/*/tables/*}"
};
Expand Down Expand Up @@ -303,6 +299,19 @@ message ReadRowsResponse {
// Throttling state. If unset, the latest response still describes
// the current throttling status.
ThrottleState throttle_state = 5;

// The schema for the read. If read_options.selected_fields is set, the
// schema may be different from the table schema as it will only contain
// the selected fields. This schema is equivelant to the one returned by
// CreateSession. This field is only populated in the first ReadRowsResponse
// RPC.
oneof schema {
// Output only. Avro schema.
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Arrow schema.
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
}
}

// Request message for `SplitReadStream`.
Expand Down Expand Up @@ -342,7 +351,9 @@ message CreateWriteStreamRequest {
// of `projects/{project}/datasets/{dataset}/tables/{table}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
(google.api.resource_reference) = {
type: "bigquery.googleapis.com/Table"
}
];

// Required. Stream to be created.
Expand All @@ -360,9 +371,9 @@ message AppendRowsRequest {
ProtoRows rows = 2;
}

// Required. The stream that is the target of the append operation. This value
// must be specified for the initial request. If subsequent requests specify
// the stream name, it must equal to the value provided in the first request.
// Required. The stream that is the target of the append operation. This value must be
// specified for the initial request. If subsequent requests specify the
// stream name, it must equal to the value provided in the first request.
// To write to the _default stream, populate this field with a string in the
// format `projects/{project}/datasets/{dataset}/tables/{table}/_default`.
string write_stream = 1 [
Expand Down Expand Up @@ -394,7 +405,7 @@ message AppendRowsRequest {

// Response message for `AppendRows`.
message AppendRowsResponse {
// A success append result.
// AppendResult is returned for successful append requests.
message AppendResult {
// The row offset at which the last append occurred. The offset will not be
// set if appending using default streams.
Expand All @@ -405,25 +416,32 @@ message AppendRowsResponse {
// Result if the append is successful.
AppendResult append_result = 1;

// Error in case of request failed. If set, it means rows are not accepted
// into the system. Users can retry or continue with other requests within
// the same connection.
// ALREADY_EXISTS: happens when offset is specified, it means the entire
// request is already appended, it is safe to ignore this error.
// OUT_OF_RANGE: happens when offset is specified, it means the specified
// offset is beyond the end of the stream.
// INVALID_ARGUMENT: error caused by malformed request or data.
// RESOURCE_EXHAUSTED: request rejected due to throttling. Only happens when
// append without offset.
// ABORTED: request processing is aborted because of prior failures, request
// can be retried if previous failure is fixed.
// INTERNAL: server side errors that can be retried.
// Error returned when problems were encountered. If present,
// it indicates rows were not accepted into the system.
// Users can retry or continue with other append requests within the
// same connection.
//
// Additional information about error signalling:
//
// ALREADY_EXISTS: Happens when an append specified an offset, and the
// backend already has received data at this offset. Typically encountered
// in retry scenarios, and can be ignored.
//
// OUT_OF_RANGE: Returned when the specified offset in the stream is beyond
// the current end of the stream.
//
// INVALID_ARGUMENT: Indicates a malformed request or data.
//
// ABORTED: Request processing is aborted because of prior failures. The
// request can be retried if previous failure is addressed.
//
// INTERNAL: Indicates server side error(s) that can be retried.
google.rpc.Status error = 2;
}

// If backend detects a schema update, pass it to user so that user can
// use it to input new type of message. It will be empty when there is no
// schema updates.
// use it to input new type of message. It will be empty when no schema
// updates have occurred.
TableSchema updated_schema = 3;
}

Expand All @@ -441,9 +459,11 @@ message GetWriteStreamRequest {

// Request message for `BatchCommitWriteStreams`.
message BatchCommitWriteStreamsRequest {
// Required. Parent table that all the streams should belong to, in the form
// of `projects/{project}/datasets/{dataset}/tables/{table}`.
string parent = 1 [(google.api.field_behavior) = REQUIRED];
// Required. Parent table that all the streams should belong to, in the form of
// `projects/{project}/datasets/{dataset}/tables/{table}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED
];

// Required. The group of streams that will be committed atomically.
repeated string write_streams = 2 [(google.api.field_behavior) = REQUIRED];
Expand All @@ -452,11 +472,15 @@ message BatchCommitWriteStreamsRequest {
// Response message for `BatchCommitWriteStreams`.
message BatchCommitWriteStreamsResponse {
// The time at which streams were committed in microseconds granularity.
// This field will only exist when there is no stream errors.
// This field will only exist when there are no stream errors.
// **Note** if this field is not set, it means the commit was not successful.
google.protobuf.Timestamp commit_time = 1;

// Stream level error if commit failed. Only streams with error will be in
// the list.
// If empty, there is no error and all streams are committed successfully.
// If non empty, certain streams have errors and ZERO stream is committed due
// to atomicity guarantee.
repeated StorageError stream_errors = 2;
}

Expand Down Expand Up @@ -500,8 +524,9 @@ message FlushRowsResponse {
}

// Structured custom BigQuery Storage error message. The error can be attached
// as error details in the returned rpc Status. User can use the info to process
// errors in a structural way, rather than having to parse error messages.
// as error details in the returned rpc Status. In particular, the use of error
// codes allows more structured error handling, and reduces the need to evaluate
// unstructured error text strings.
message StorageError {
// Error code for `StorageError`.
enum StorageErrorCode {
Expand All @@ -522,9 +547,12 @@ message StorageError {
INVALID_STREAM_TYPE = 4;

// Invalid Stream state.
// For example, you try to commit a stream that is not fianlized or is
// For example, you try to commit a stream that is not finalized or is
// garbaged.
INVALID_STREAM_STATE = 5;

// Stream is finalized.
STREAM_FINALIZED = 6;
}

// BigQuery Storage specific error code.
Expand Down
4 changes: 3 additions & 1 deletion google/cloud/bigquery_storage_v1beta2/proto/stream.proto
@@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -74,6 +74,8 @@ message ReadSession {
// "nullable_field is not NULL"
// "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
// "numeric_field BETWEEN 1.0 AND 5.0"
//
// Restricted to a maximum length for 1 MB.
string row_restriction = 2;

// Optional. Options specific to the Apache Arrow output format.
Expand Down
8 changes: 7 additions & 1 deletion google/cloud/bigquery_storage_v1beta2/proto/table.proto
@@ -1,4 +1,4 @@
// Copyright 2020 Google LLC
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -70,6 +70,12 @@ message TableFieldSchema {

// Numeric value
NUMERIC = 12;

// BigNumeric value
BIGNUMERIC = 13;

// Interval
INTERVAL = 14;
}

enum Mode {
Expand Down
6 changes: 3 additions & 3 deletions google/cloud/bigquery_storage_v1beta2/types/protobuf.py
Expand Up @@ -28,8 +28,8 @@


class ProtoSchema(proto.Message):
r"""Protobuf schema is an API presentation the proto buffer
schema.
r"""ProtoSchema describes the schema of the serialized protocol
buffer data rows.

Attributes:
proto_descriptor (google.protobuf.descriptor_pb2.DescriptorProto):
Expand All @@ -47,7 +47,7 @@ class ProtoSchema(proto.Message):


class ProtoRows(proto.Message):
r"""Protobuf rows.
r"""

Attributes:
serialized_rows (Sequence[bytes]):
Expand Down