Skip to content

Commit

Permalink
feat: Add ZSTD compression as an option for Arrow (#197)
Browse files Browse the repository at this point in the history
Committer: @emkornfield
PiperOrigin-RevId: 374220891

Source-Author: Google APIs <noreply@google.com>
Source-Date: Mon May 17 10:03:14 2021 -0700
Source-Repo: googleapis/googleapis
Source-Sha: 23efea9fc7bedfe53b24295ed84b5f873606edcb
Source-Link: googleapis/googleapis@23efea9
  • Loading branch information
yoshi-automation committed May 18, 2021
1 parent 0fe6484 commit f941446
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 20 deletions.
3 changes: 3 additions & 0 deletions google/cloud/bigquery_storage_v1/proto/arrow.proto
Expand Up @@ -52,6 +52,9 @@ message ArrowSerializationOptions {

// LZ4 Frame (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)
LZ4_FRAME = 1;

// Zstandard compression.
ZSTD = 2;
}

// The compression codec to use for Arrow buffers in serialized record
Expand Down
6 changes: 2 additions & 4 deletions google/cloud/bigquery_storage_v1/proto/storage.proto
Expand Up @@ -69,8 +69,7 @@ service BigQueryRead {
post: "/v1/{read_session.table=projects/*/datasets/*/tables/*}"
body: "*"
};
option (google.api.method_signature) =
"parent,read_session,max_stream_count";
option (google.api.method_signature) = "parent,read_session,max_stream_count";
}

// Reads rows from the stream in the format prescribed by the ReadSession.
Expand Down Expand Up @@ -99,8 +98,7 @@ service BigQueryRead {
// original, primary, and residual, that original[0-j] = primary[0-j] and
// original[j-n] = residual[0-m] once the streams have been read to
// completion.
rpc SplitReadStream(SplitReadStreamRequest)
returns (SplitReadStreamResponse) {
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/sessions/*/streams/*}"
};
Expand Down
22 changes: 10 additions & 12 deletions google/cloud/bigquery_storage_v1/proto/stream.proto
Expand Up @@ -75,23 +75,20 @@ message ReadSession {
// Restricted to a maximum length for 1 MB.
string row_restriction = 2;

// Optional. Options specific to the Apache Arrow output format.
oneof output_format_serialization_options {
ArrowSerializationOptions arrow_serialization_options = 3
[(google.api.field_behavior) = OPTIONAL];
// Optional. Options specific to the Apache Arrow output format.
ArrowSerializationOptions arrow_serialization_options = 3 [(google.api.field_behavior) = OPTIONAL];
}
}

// Output only. Unique identifier for the session, in the form
// `projects/{project_id}/locations/{location}/sessions/{session_id}`.
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. Time at which the session becomes invalid. After this time,
// subsequent requests to read this Session will return errors. The
// expire_time is automatically assigned and currently cannot be specified or
// updated.
google.protobuf.Timestamp expire_time = 2
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Time at which the session becomes invalid. After this time, subsequent
// requests to read this Session will return errors. The expire_time is
// automatically assigned and currently cannot be specified or updated.
google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

// Immutable. Data format of the output data.
DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
Expand All @@ -111,11 +108,12 @@ message ReadSession {
// `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`
string table = 6 [
(google.api.field_behavior) = IMMUTABLE,
(google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
(google.api.resource_reference) = {
type: "bigquery.googleapis.com/Table"
}
];

// Optional. Any modifiers which are applied when reading from the specified
// table.
// Optional. Any modifiers which are applied when reading from the specified table.
TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. Read options for this session (e.g. column selection, filters).
Expand Down
1 change: 1 addition & 0 deletions google/cloud/bigquery_storage_v1/types/arrow.py
Expand Up @@ -63,6 +63,7 @@ class CompressionCodec(proto.Enum):
r"""Compression codec's supported by Arrow."""
COMPRESSION_UNSPECIFIED = 0
LZ4_FRAME = 1
ZSTD = 2

buffer_compression = proto.Field(proto.ENUM, number=2, enum=CompressionCodec,)

Expand Down
3 changes: 2 additions & 1 deletion google/cloud/bigquery_storage_v1/types/stream.py
Expand Up @@ -102,7 +102,8 @@ class TableReadOptions(proto.Message):
Restricted to a maximum length for 1 MB.
arrow_serialization_options (google.cloud.bigquery_storage_v1.types.ArrowSerializationOptions):
Optional. Options specific to the Apache
Arrow output format.
"""

selected_fields = proto.RepeatedField(proto.STRING, number=1,)
Expand Down
6 changes: 3 additions & 3 deletions synth.metadata
Expand Up @@ -4,15 +4,15 @@
"git": {
"name": ".",
"remote": "https://github.com/googleapis/python-bigquery-storage.git",
"sha": "ceae220a9fa5daa03da7907ae001d2689a7fbdcb"
"sha": "0fe648449715c0591c64a2013330ecba9d125fa1"
}
},
{
"git": {
"name": "googleapis",
"remote": "https://github.com/googleapis/googleapis.git",
"sha": "7e1b14e6c7a9ab96d2db7e4a131981f162446d34",
"internalRef": "373649163"
"sha": "23efea9fc7bedfe53b24295ed84b5f873606edcb",
"internalRef": "374220891"
}
}
],
Expand Down

0 comments on commit f941446

Please sign in to comment.