Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Respect telemetry configuration when running as a Windows service #9726

Merged
25 changes: 25 additions & 0 deletions .chloggen/windows-service-respects-telemetry-config.yaml
@@ -0,0 +1,25 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: otelcol

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Respect telemetry configuration when running as a Windows service

# One or more tracking issues or pull requests related to the change
issues: [5300]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
7 changes: 4 additions & 3 deletions .github/workflows/build-and-test-windows.yaml
Expand Up @@ -61,15 +61,16 @@ jobs:

- name: Install otelcorecol as a service
run: |
New-Service -Name "otelcorecol" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml"
New-Service -Name "otelcorecol" -StartupType "Manual" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml"
eventcreate.exe /t information /id 1 /l application /d "Creating event provider for 'otelcorecol'" /so otelcorecol

- name: Test otelcorecol service
working-directory: ${{ github.workspace }}/otelcol
run: |
(Start-Service otelcorecol -PassThru).WaitForStatus('Running', '00:00:30')
(Stop-Service otelcorecol -PassThru).WaitForStatus('Stopped', '00:00:30')
go test -timeout 90s -run ^TestCollectorAsService$ -v -tags=win32service

- name: Remove otelcorecol service
if: always()
run: |
Remove-Service otelcorecol
Remove-Item HKLM:\SYSTEM\CurrentControlSet\Services\EventLog\Application\otelcorecol
6 changes: 4 additions & 2 deletions otelcol/collector.go
Expand Up @@ -99,8 +99,9 @@ type Collector struct {

configProvider ConfigProvider

service *service.Service
state *atomic.Int32
serviceConfig *service.Config
service *service.Service
state *atomic.Int32

// shutdownChan is used to terminate the collector.
shutdownChan chan struct{}
Expand Down Expand Up @@ -182,6 +183,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error {
return fmt.Errorf("invalid configuration: %w", err)
}

col.serviceConfig = &cfg.Service
col.service, err = service.New(ctx, service.Settings{
BuildInfo: col.set.BuildInfo,
CollectorConf: conf,
Expand Down
30 changes: 24 additions & 6 deletions otelcol/collector_windows.go
Expand Up @@ -18,6 +18,7 @@ import (
"golang.org/x/sys/windows/svc/eventlog"

"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/service"
)

type windowsService struct {
Expand Down Expand Up @@ -76,11 +77,6 @@ func (s *windowsService) Execute(args []string, requests <-chan svc.ChangeReques
}

func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) error {
// Append to new slice instead of the already existing s.settings.LoggingOptions slice to not change that.
s.settings.LoggingOptions = append(
[]zap.Option{zap.WrapCore(withWindowsCore(elog))},
s.settings.LoggingOptions...,
)
// Parse all the flags manually.
if err := s.flags.Parse(os.Args[1:]); err != nil {
return err
Expand All @@ -96,6 +92,18 @@ func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) e
return err
}

// The logging options need to be in place before the collector Run method is called
// since the telemetry creates the logger at the time of the Run method call.
// However, the zap.WrapCore function needs to read the serviceConfig to determine
// if the Windows Event Log should be used, however, the serviceConfig is also
// only read at the time of the Run method call. To work around this, we pass the
// serviceConfig as a pointer to the logging options, and then read its value
// when the zap.Logger is created by the telemetry.
s.col.set.LoggingOptions = append(
s.col.set.LoggingOptions,
zap.WrapCore(withWindowsCore(elog, &s.col.serviceConfig)),
)

// col.Run blocks until receiving a SIGTERM signal, so needs to be started
// asynchronously, but it will exit early if an error occurs on startup
go func() {
Expand Down Expand Up @@ -192,8 +200,18 @@ func (w windowsEventLogCore) Sync() error {
return w.core.Sync()
}

func withWindowsCore(elog *eventlog.Log) func(zapcore.Core) zapcore.Core {
func withWindowsCore(elog *eventlog.Log, serviceConfig **service.Config) func(zapcore.Core) zapcore.Core {
return func(core zapcore.Core) zapcore.Core {
if serviceConfig != nil {
for _, output := range (*serviceConfig).Telemetry.Logs.OutputPaths {
if output != "stdout" && output != "stderr" {
// A log file was specified in the configuration, so we should not use the Windows Event Log
return core
}
}
}

// Use the Windows Event Log
encoderConfig := zap.NewProductionEncoderConfig()
encoderConfig.LineEnding = "\r\n"
return windowsEventLogCore{core, elog, zapcore.NewConsoleEncoder(encoderConfig)}
Expand Down
185 changes: 185 additions & 0 deletions otelcol/collector_windows_service_test.go
@@ -0,0 +1,185 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:build windows && win32service

package otelcol

import (
"encoding/xml"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"
"golang.org/x/sys/windows/svc"
"golang.org/x/sys/windows/svc/mgr"
)

const (
collectorServiceName = "otelcorecol"
)

// Test the collector as a Windows service.
// The test assumes that the service and respective event source are already created.
// The test also must be executed with administrative privileges.
func TestCollectorAsService(t *testing.T) {
collector_executable, err := filepath.Abs(filepath.Join("..", "bin", "otelcorecol_windows_amd64"))
require.NoError(t, err)
_, err = os.Stat(collector_executable)
require.NoError(t, err)

scm, err := mgr.Connect()
require.NoError(t, err)
defer scm.Disconnect()

service, err := scm.OpenService(collectorServiceName)
require.NoError(t, err)
defer service.Close()

tests := []struct {
name string
configFile string
expectStartFailure bool
customSetup func(*testing.T)
customValidation func(*testing.T)
}{
{
name: "Default",
configFile: filepath.Join("..", "examples", "local", "otel-config.yaml"),
},
{
name: "ConfigFileNotFound",
configFile: filepath.Join(".", "non", "existent", "otel-config.yaml"),
expectStartFailure: true,
},
{
name: "LogToFile",
configFile: filepath.Join(".", "testdata", "otel-log-to-file.yaml"),
customSetup: func(t *testing.T) {
// Create the folder and clean the log file if it exists
programDataPath := os.Getenv("ProgramData")
logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs")
err := os.MkdirAll(logsPath, os.ModePerm)
require.NoError(t, err)

logFilePath := filepath.Join(logsPath, "otelcol.log")
err = os.Remove(logFilePath)
if err != nil && !os.IsNotExist(err) {
require.NoError(t, err)
}
},
customValidation: func(t *testing.T) {
// Check that the log file was created
programDataPath := os.Getenv("ProgramData")
logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs")
logFilePath := filepath.Join(logsPath, "otelcol.log")
fileinfo, err := os.Stat(logFilePath)
require.NoError(t, err)
require.NotEmpty(t, fileinfo.Size())
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
serviceConfig, err := service.Config()
require.NoError(t, err)

// Setup the command line to launch the collector as a service
fullConfigPath, err := filepath.Abs(tt.configFile)
require.NoError(t, err)

serviceConfig.BinaryPathName = fmt.Sprintf("\"%s\" --config \"%s\"", collector_executable, fullConfigPath)
err = service.UpdateConfig(serviceConfig)
require.NoError(t, err)

if tt.customSetup != nil {
tt.customSetup(t)
}

startTime := time.Now()

err = service.Start()
require.NoError(t, err)

expectedState := svc.Running
if tt.expectStartFailure {
expectedState = svc.Stopped
} else {
defer func() {
_, err = service.Control(svc.Stop)
require.NoError(t, err)

require.Eventually(t, func() bool {
status, _ := service.Query()
return status.State == svc.Stopped
}, 10*time.Second, 500*time.Millisecond)
}()
}

// Wait for the service to reach the expected state
require.Eventually(t, func() bool {
status, _ := service.Query()
return status.State == expectedState
}, 10*time.Second, 500*time.Millisecond)

if tt.customValidation != nil {
tt.customValidation(t)
} else {
// Read the events from the otelcorecol source and check that they were emitted after the service
// command started. This is a simple validation that the messages are being logged on the
// Windows event log.
cmd := exec.Command("wevtutil.exe", "qe", "Application", "/c:1", "/rd:true", "/f:RenderedXml", "/q:*[System[Provider[@Name='otelcorecol']]]")
out, err := cmd.CombinedOutput()
require.NoError(t, err)

var e Event
require.NoError(t, xml.Unmarshal([]byte(out), &e))

eventTime, err := time.Parse("2006-01-02T15:04:05.9999999Z07:00", e.System.TimeCreated.SystemTime)
require.NoError(t, err)

require.True(t, eventTime.After(startTime.In(time.UTC)))
}
})
}
}

// Helper types to read the XML events from the event log using wevtutil
type Event struct {
XMLName xml.Name `xml:"Event"`
System System `xml:"System"`
Data string `xml:"EventData>Data"`
}

type System struct {
Provider Provider `xml:"Provider"`
EventID int `xml:"EventID"`
Version int `xml:"Version"`
Level int `xml:"Level"`
Task int `xml:"Task"`
Opcode int `xml:"Opcode"`
Keywords string `xml:"Keywords"`
TimeCreated TimeCreated `xml:"TimeCreated"`
EventRecordID int `xml:"EventRecordID"`
Execution Execution `xml:"Execution"`
Channel string `xml:"Channel"`
Computer string `xml:"Computer"`
}

type Provider struct {
Name string `xml:"Name,attr"`
}

type TimeCreated struct {
SystemTime string `xml:"SystemTime,attr"`
}

type Execution struct {
ProcessID string `xml:"ProcessID,attr"`
ThreadID string `xml:"ThreadID,attr"`
}
30 changes: 30 additions & 0 deletions otelcol/testdata/otel-log-to-file.yaml
@@ -0,0 +1,30 @@
extensions:
zpages:

receivers:
otlp:
protocols:
grpc:
http:

exporters:
debug:
verbosity: detailed

service:
telemetry:
logs:
level: warn
output_paths:
# The folder need to be created prior to starting the collector
- ${ProgramData}\OpenTelemetry\Collector\Logs\otelcol.log

pipelines:
traces:
receivers: [otlp]
exporters: [debug]
metrics:
receivers: [otlp]
exporters: [debug]

extensions: [zpages]