Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bigquery/storage/managedwriter): add state tracking #4407

Merged
127 changes: 127 additions & 0 deletions bigquery/storage/managedwriter/appendresult.go
@@ -0,0 +1,127 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package managedwriter

import (
"context"

storagepb "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta2"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/wrapperspb"
)

// NoOffset is a sentinel value for signalling we're not tracking
// stream offset (e.g. a default stream which allows simultaneous append streams).
const NoOffset int64 = -1
codyoss marked this conversation as resolved.
Show resolved Hide resolved

// AppendResult tracks the status of a single row of data.
type AppendResult struct {
// rowData contains the serialized row data.
rowData []byte

ready chan struct{}

// if the encapsulating append failed, this will retain a reference to the error.
err error

// the stream offset
offset int64
}

func newAppendResult(data []byte) *AppendResult {
return &AppendResult{
ready: make(chan struct{}),
rowData: data,
}
}

// Ready blocks until the append request is completed.
func (ar *AppendResult) Ready() <-chan struct{} { return ar.ready }

// GetResult returns the optional offset of this row, or the associated
codyoss marked this conversation as resolved.
Show resolved Hide resolved
// error.
func (ar *AppendResult) GetResult(ctx context.Context) (int64, error) {
select {
codyoss marked this conversation as resolved.
Show resolved Hide resolved
case <-ctx.Done():
return 0, ctx.Err()
case <-ar.Ready():
return ar.offset, ar.err
}
}

// pendingWrite tracks state for a set of rows that are part of a single
// append request.
type pendingWrite struct {
request *storagepb.AppendRowsRequest
results []*AppendResult

// this is used by the flow controller.
reqSize int
}

// newPendingWrite constructs the proto request and attaches references
// to the pending results for later consumption. The reason for this is
// that in the future, we may want to allow row batching to be managed by
// the server (e.g. for default/COMMITTED streams). For BUFFERED/PENDING
// streams, this should be managed by the user.
func newPendingWrite(appends [][]byte, offset int64) *pendingWrite {

results := make([]*AppendResult, len(appends))
for k, r := range appends {
results[k] = newAppendResult(r)
}
pw := &pendingWrite{
request: &storagepb.AppendRowsRequest{
Rows: &storagepb.AppendRowsRequest_ProtoRows{
ProtoRows: &storagepb.AppendRowsRequest_ProtoData{
Rows: &storagepb.ProtoRows{
SerializedRows: appends,
},
},
},
},
results: results,
}
if offset > 0 {
pw.request.Offset = &wrapperspb.Int64Value{Value: offset}
}
// We compute the size now for flow controller purposes, though
// the actual request size may be slightly larger (e.g. the first
// request in a new stream bears schema and stream id).
pw.reqSize = proto.Size(pw.request)
return pw
}

// markDone propagates finalization of an append request to associated
// AppendResult references.
func (pw *pendingWrite) markDone(startOffset int64, err error) {
curOffset := startOffset
for _, ar := range pw.results {
if err != nil {
ar.err = err
close(ar.ready)
continue
}

ar.offset = curOffset
// only advance curOffset if we were given a valid starting offset.
if startOffset >= 0 {
codyoss marked this conversation as resolved.
Show resolved Hide resolved
curOffset = curOffset + 1
}
close(ar.ready)
}
// Clear the reference to the request.
pw.request = nil
}
117 changes: 117 additions & 0 deletions bigquery/storage/managedwriter/appendresult_test.go
@@ -0,0 +1,117 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package managedwriter

import (
"bytes"
"fmt"
"testing"
"time"
)

func TestAppendResult(t *testing.T) {

wantRowBytes := []byte("rowdata")

gotAR := newAppendResult(wantRowBytes)
if !bytes.Equal(gotAR.rowData, wantRowBytes) {
t.Errorf("mismatch in row data, got %q want %q", gotAR.rowData, wantRowBytes)
}
}

func TestPendingWrite(t *testing.T) {
wantRowData := [][]byte{
[]byte("row1"),
[]byte("row2"),
[]byte("row3"),
}

var wantOffset int64 = 99

// first, verify no offset behavior
pending := newPendingWrite(wantRowData, NoOffset)
if pending.request.GetOffset() != nil {
t.Errorf("request should have no offset, but is present: %q", pending.request.GetOffset().GetValue())
}
pending.markDone(NoOffset, nil)
for k, ar := range pending.results {
if ar.offset != NoOffset {
t.Errorf("mismatch on completed AppendResult(%d) without offset: got %d want %d", k, ar.offset, NoOffset)
}
if ar.err != nil {
t.Errorf("mismatch in error on AppendResult(%d), got %v want nil", k, ar.err)
}
}

// now, verify behavior with a valid offset
pending = newPendingWrite(wantRowData, 99)
if pending.request.GetOffset() == nil {
t.Errorf("offset not set, should be %d", wantOffset)
}
if gotOffset := pending.request.GetOffset().GetValue(); gotOffset != wantOffset {
t.Errorf("offset mismatch, got %d want %d", gotOffset, wantOffset)
}

// check request shape
gotRowCount := len(pending.request.GetProtoRows().GetRows().GetSerializedRows())
if gotRowCount != len(wantRowData) {
t.Errorf("pendingWrite request mismatch, got %d rows, want %d rows", gotRowCount, len(wantRowData))
}

// verify child AppendResults
if len(pending.results) != len(wantRowData) {
t.Errorf("mismatch in rows and append results. %d rows, %d AppendResults", len(wantRowData), len(pending.results))
}
for k, ar := range pending.results {
gotData := ar.rowData
if !bytes.Equal(gotData, wantRowData[k]) {
t.Errorf("row %d mismatch in data: got %q want %q", k, gotData, wantRowData[k])
}
select {
case <-ar.Ready():
t.Errorf("got Ready() on incomplete AppendResult %d", k)
case <-time.After(100 * time.Millisecond):
continue
}
}

// verify completion behavior
reportedOffset := int64(101)
wantErr := fmt.Errorf("foo")
pending.markDone(reportedOffset, wantErr)

if pending.request != nil {
t.Errorf("expected request to be cleared, is present: %#v", pending.request)
}
for k, ar := range pending.results {
gotData := ar.rowData
if !bytes.Equal(gotData, wantRowData[k]) {
t.Errorf("row %d mismatch in data: got %q want %q", k, gotData, wantRowData[k])
}
select {
case <-ar.Ready():
continue
case <-time.After(100 * time.Millisecond):
t.Errorf("possible blocking on completed AppendResult %d", k)
}
if ar.offset != reportedOffset+int64(k) {
t.Errorf("mismatch on completed AppendResult offset: got %d want %d", ar.offset, reportedOffset+int64(k))
}
if ar.err != wantErr {
t.Errorf("mismatch in errors, got %v want %v", ar.err, wantErr)
}
}

}