Skip to content

Commit

Permalink
[exporterhelper] Add default batching for OTLP data type (#9738)
Browse files Browse the repository at this point in the history
Introduce default batching functionality based on the internal data type
(pdata). This makes the exporter batching capability available to the
regular exporter helpers without using custom requests.

Updates #8122
  • Loading branch information
dmitryax committed Mar 27, 2024
1 parent c97d1e5 commit 4b5d68e
Show file tree
Hide file tree
Showing 13 changed files with 1,124 additions and 19 deletions.
28 changes: 28 additions & 0 deletions .chloggen/batch-exporter-helper.yaml
@@ -0,0 +1,28 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: exporterhelper

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Make the `WithBatcher` option available for regular exporter helpers based on OTLP data type.

# One or more tracking issues or pull requests related to the change
issues: [8122]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Not `WithBatcher` can be used with both regular exporter helper (e.g. NewTracesExporter) and the request-based exporter
helper (e.g. NewTracesRequestExporter). The request-based exporter helpers require `WithRequestBatchFuncs` option
providing batching functions.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [api]
17 changes: 10 additions & 7 deletions exporter/exporterhelper/batch_sender.go
Expand Up @@ -45,14 +45,17 @@ type batchSender struct {
}

// newBatchSender returns a new batch consumer component.
func newBatchSender(cfg exporterbatcher.Config, set exporter.CreateSettings) *batchSender {
func newBatchSender(cfg exporterbatcher.Config, set exporter.CreateSettings,
mf exporterbatcher.BatchMergeFunc[Request], msf exporterbatcher.BatchMergeSplitFunc[Request]) *batchSender {
bs := &batchSender{
activeBatch: newEmptyBatch(),
cfg: cfg,
logger: set.Logger,
shutdownCh: make(chan struct{}),
stopped: &atomic.Bool{},
resetTimerCh: make(chan struct{}),
activeBatch: newEmptyBatch(),
cfg: cfg,
logger: set.Logger,
mergeFunc: mf,
mergeSplitFunc: msf,
shutdownCh: make(chan struct{}),
stopped: &atomic.Bool{},
resetTimerCh: make(chan struct{}),
}
return bs
}
Expand Down
45 changes: 45 additions & 0 deletions exporter/exporterhelper/batch_sender_test.go
Expand Up @@ -394,6 +394,51 @@ func TestBatchSender_DrainActiveRequests(t *testing.T) {
assert.Equal(t, uint64(3), sink.itemsCount.Load())
}

func TestBatchSender_WithBatcherOption(t *testing.T) {
tests := []struct {
name string
opts []Option
expectedErr bool
}{
{
name: "no_funcs_set",
opts: []Option{WithBatcher(exporterbatcher.NewDefaultConfig())},
expectedErr: true,
},
{
name: "funcs_set_internally",
opts: []Option{withBatchFuncs(fakeBatchMergeFunc, fakeBatchMergeSplitFunc), WithBatcher(exporterbatcher.NewDefaultConfig())},
expectedErr: false,
},
{
name: "funcs_set_twice",
opts: []Option{
withBatchFuncs(fakeBatchMergeFunc, fakeBatchMergeSplitFunc),
WithBatcher(exporterbatcher.NewDefaultConfig(), WithRequestBatchFuncs(fakeBatchMergeFunc,
fakeBatchMergeSplitFunc)),
},
expectedErr: true,
},
{
name: "nil_funcs",
opts: []Option{WithBatcher(exporterbatcher.NewDefaultConfig(), WithRequestBatchFuncs(nil, nil))},
expectedErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
be, err := newBaseExporter(defaultSettings, defaultType, newNoopObsrepSender, tt.opts...)
if tt.expectedErr {
assert.Nil(t, be)
assert.Error(t, err)
} else {
assert.NotNil(t, be)
assert.NoError(t, err)
}
})
}
}

func queueBatchExporter(t *testing.T, batchOption Option) *baseExporter {
be, err := newBaseExporter(defaultSettings, defaultType, newNoopObsrepSender, batchOption,
WithRequestQueue(exporterqueue.NewDefaultConfig(), exporterqueue.NewMemoryQueueFactory[Request]()))
Expand Down
42 changes: 33 additions & 9 deletions exporter/exporterhelper/common.go
Expand Up @@ -148,27 +148,35 @@ func WithCapabilities(capabilities consumer.Capabilities) Option {
}

// BatcherOption apply changes to batcher sender.
type BatcherOption func(*batchSender)
type BatcherOption func(*batchSender) error

// WithRequestBatchFuncs sets the functions for merging and splitting batches for an exporter built for custom request types.
func WithRequestBatchFuncs(mf exporterbatcher.BatchMergeFunc[Request], msf exporterbatcher.BatchMergeSplitFunc[Request]) BatcherOption {
return func(bs *batchSender) {
return func(bs *batchSender) error {
if mf == nil || msf == nil {
return fmt.Errorf("WithRequestBatchFuncs must be provided with non-nil functions")
}
if bs.mergeFunc != nil || bs.mergeSplitFunc != nil {
return fmt.Errorf("WithRequestBatchFuncs can only be used once with request-based exporters")
}
bs.mergeFunc = mf
bs.mergeSplitFunc = msf
return nil
}
}

// WithBatcher enables batching for an exporter based on custom request types.
// For now, it can be used only with the New[Traces|Metrics|Logs]RequestExporter exporter helpers and
// WithRequestBatchFuncs provided.
// TODO: Add OTLP-based batch functions applied by default so it can be used with New[Traces|Metrics|Logs]Exporter exporter helpers.
// This API is at the early stage of development and may change without backward compatibility
// until https://github.com/open-telemetry/opentelemetry-collector/issues/8122 is resolved.
func WithBatcher(cfg exporterbatcher.Config, opts ...BatcherOption) Option {
return func(o *baseExporter) error {
bs := newBatchSender(cfg, o.set)
bs := newBatchSender(cfg, o.set, o.batchMergeFunc, o.batchMergeSplitfunc)
for _, opt := range opts {
opt(bs)
if err := opt(bs); err != nil {
return err
}
}
if bs.mergeFunc == nil || bs.mergeSplitFunc == nil {
return fmt.Errorf("WithRequestBatchFuncs must be provided for the batcher applied to the request-based exporters")
Expand Down Expand Up @@ -196,14 +204,28 @@ func withUnmarshaler(unmarshaler exporterqueue.Unmarshaler[Request]) Option {
}
}

// withBatchFuncs is used to set the functions for merging and splitting batches for OLTP-based exporters.
// It must be provided as the first option when creating a new exporter helper.
func withBatchFuncs(mf exporterbatcher.BatchMergeFunc[Request], msf exporterbatcher.BatchMergeSplitFunc[Request]) Option {
return func(o *baseExporter) error {
o.batchMergeFunc = mf
o.batchMergeSplitfunc = msf
return nil
}
}

// baseExporter contains common fields between different exporter types.
type baseExporter struct {
component.StartFunc
component.ShutdownFunc

signal component.DataType

batchMergeFunc exporterbatcher.BatchMergeFunc[Request]
batchMergeSplitfunc exporterbatcher.BatchMergeSplitFunc[Request]

marshaler exporterqueue.Marshaler[Request]
unmarshaler exporterqueue.Unmarshaler[Request]
signal component.DataType

set exporter.CreateSettings
obsrep *ObsReport
Expand Down Expand Up @@ -251,11 +273,13 @@ func newBaseExporter(set exporter.CreateSettings, signal component.DataType, osf

be.connectSenders()

// If queue sender is enabled assign to the batch sender the same number of workers.
if qs, ok := be.queueSender.(*queueSender); ok {
if bs, ok := be.batchSender.(*batchSender); ok {
if bs, ok := be.batchSender.(*batchSender); ok {
// If queue sender is enabled assign to the batch sender the same number of workers.
if qs, ok := be.queueSender.(*queueSender); ok {
bs.concurrencyLimit = uint64(qs.numConsumers)
}
// Batcher sender mutates the data.
be.consumerOptions = append(be.consumerOptions, consumer.WithCapabilities(consumer.Capabilities{MutatesData: true}))
}

return be, nil
Expand Down
5 changes: 4 additions & 1 deletion exporter/exporterhelper/logs.go
Expand Up @@ -82,7 +82,10 @@ func NewLogsExporter(
if pusher == nil {
return nil, errNilPushLogsData
}
logsOpts := []Option{withMarshaler(logsRequestMarshaler), withUnmarshaler(newLogsRequestUnmarshalerFunc(pusher))}
logsOpts := []Option{
withMarshaler(logsRequestMarshaler), withUnmarshaler(newLogsRequestUnmarshalerFunc(pusher)),
withBatchFuncs(mergeLogs, mergeSplitLogs),
}
return NewLogsRequestExporter(ctx, set, requestFromLogs(pusher), append(logsOpts, options...)...)
}

Expand Down
137 changes: 137 additions & 0 deletions exporter/exporterhelper/logs_batch.go
@@ -0,0 +1,137 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package exporterhelper // import "go.opentelemetry.io/collector/exporter/exporterhelper"

import (
"context"
"errors"

"go.opentelemetry.io/collector/exporter/exporterbatcher"
"go.opentelemetry.io/collector/pdata/plog"
)

// mergeLogs merges two logs requests into one.
func mergeLogs(_ context.Context, r1 Request, r2 Request) (Request, error) {
lr1, ok1 := r1.(*logsRequest)
lr2, ok2 := r2.(*logsRequest)
if !ok1 || !ok2 {
return nil, errors.New("invalid input type")
}
lr2.ld.ResourceLogs().MoveAndAppendTo(lr1.ld.ResourceLogs())
return lr1, nil
}

// mergeSplitLogs splits and/or merges the logs into multiple requests based on the MaxSizeConfig.
func mergeSplitLogs(_ context.Context, cfg exporterbatcher.MaxSizeConfig, r1 Request, r2 Request) ([]Request, error) {
var (
res []Request
destReq *logsRequest
capacityLeft = cfg.MaxSizeItems
)
for _, req := range []Request{r1, r2} {
if req == nil {
continue
}
srcReq, ok := req.(*logsRequest)
if !ok {
return nil, errors.New("invalid input type")
}
if srcReq.ld.LogRecordCount() <= capacityLeft {
if destReq == nil {
destReq = srcReq
} else {
srcReq.ld.ResourceLogs().MoveAndAppendTo(destReq.ld.ResourceLogs())
}
capacityLeft -= destReq.ld.LogRecordCount()
continue
}

for {
extractedLogs := extractLogs(srcReq.ld, capacityLeft)
if extractedLogs.LogRecordCount() == 0 {
break
}
capacityLeft -= extractedLogs.LogRecordCount()
if destReq == nil {
destReq = &logsRequest{ld: extractedLogs, pusher: srcReq.pusher}
} else {
extractedLogs.ResourceLogs().MoveAndAppendTo(destReq.ld.ResourceLogs())
}
// Create new batch once capacity is reached.
if capacityLeft == 0 {
res = append(res, destReq)
destReq = nil
capacityLeft = cfg.MaxSizeItems
}
}
}

if destReq != nil {
res = append(res, destReq)
}
return res, nil
}

// extractLogs extracts logs from the input logs and returns a new logs with the specified number of log records.
func extractLogs(srcLogs plog.Logs, count int) plog.Logs {
destLogs := plog.NewLogs()
srcLogs.ResourceLogs().RemoveIf(func(srcRL plog.ResourceLogs) bool {
if count == 0 {
return false
}
needToExtract := resourceLogsCount(srcRL) > count
if needToExtract {
srcRL = extractResourceLogs(srcRL, count)
}
count -= resourceLogsCount(srcRL)
srcRL.MoveTo(destLogs.ResourceLogs().AppendEmpty())
return !needToExtract
})
return destLogs
}

// extractResourceLogs extracts resource logs and returns a new resource logs with the specified number of log records.
func extractResourceLogs(srcRL plog.ResourceLogs, count int) plog.ResourceLogs {
destRL := plog.NewResourceLogs()
destRL.SetSchemaUrl(srcRL.SchemaUrl())
srcRL.Resource().CopyTo(destRL.Resource())
srcRL.ScopeLogs().RemoveIf(func(srcSL plog.ScopeLogs) bool {
if count == 0 {
return false
}
needToExtract := srcSL.LogRecords().Len() > count
if needToExtract {
srcSL = extractScopeLogs(srcSL, count)
}
count -= srcSL.LogRecords().Len()
srcSL.MoveTo(destRL.ScopeLogs().AppendEmpty())
return !needToExtract
})
return destRL
}

// extractScopeLogs extracts scope logs and returns a new scope logs with the specified number of log records.
func extractScopeLogs(srcSL plog.ScopeLogs, count int) plog.ScopeLogs {
destSL := plog.NewScopeLogs()
destSL.SetSchemaUrl(srcSL.SchemaUrl())
srcSL.Scope().CopyTo(destSL.Scope())
srcSL.LogRecords().RemoveIf(func(srcLR plog.LogRecord) bool {
if count == 0 {
return false
}
srcLR.MoveTo(destSL.LogRecords().AppendEmpty())
count--
return true
})
return destSL
}

// resourceLogsCount calculates the total number of log records in the plog.ResourceLogs.
func resourceLogsCount(rl plog.ResourceLogs) int {
count := 0
for k := 0; k < rl.ScopeLogs().Len(); k++ {
count += rl.ScopeLogs().At(k).LogRecords().Len()
}
return count
}

0 comments on commit 4b5d68e

Please sign in to comment.