Skip to content

Commit

Permalink
Respect telemetry configuration when running as a Windows service (#9726
Browse files Browse the repository at this point in the history
)

**Description:**
Fixes #5300 

With this change the service telemetry section is respected by the
collector when running as a Windows service. Log lever can be used to
control the verbosity of the events logged and the logger can be
redirected to a file by specifying an output path on the service
telemetry config. By default `stdout` and `stderr` are redirected to the
event log when running as a Windows service to keep the current
behavior.

The code change itself was made with a focus of not breaking the public
APIs and not reading the config more than once. That said it is probably
something to be refactored when the public APIs can be touched again.

**Link to tracking Issue:**
#5300

**Testing:**
The test is an integration test that depends on the actual executable.
It checks for event publication and file output.
  • Loading branch information
pjanotti committed Mar 15, 2024
1 parent 06f177a commit 8990be3
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 11 deletions.
25 changes: 25 additions & 0 deletions .chloggen/windows-service-respects-telemetry-config.yaml
@@ -0,0 +1,25 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: otelcol

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Respect telemetry configuration when running as a Windows service

# One or more tracking issues or pull requests related to the change
issues: [5300]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
7 changes: 4 additions & 3 deletions .github/workflows/build-and-test-windows.yaml
Expand Up @@ -61,15 +61,16 @@ jobs:

- name: Install otelcorecol as a service
run: |
New-Service -Name "otelcorecol" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml"
New-Service -Name "otelcorecol" -StartupType "Manual" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml"
eventcreate.exe /t information /id 1 /l application /d "Creating event provider for 'otelcorecol'" /so otelcorecol
- name: Test otelcorecol service
working-directory: ${{ github.workspace }}/otelcol
run: |
(Start-Service otelcorecol -PassThru).WaitForStatus('Running', '00:00:30')
(Stop-Service otelcorecol -PassThru).WaitForStatus('Stopped', '00:00:30')
go test -timeout 90s -run ^TestCollectorAsService$ -v -tags=win32service
- name: Remove otelcorecol service
if: always()
run: |
Remove-Service otelcorecol
Remove-Item HKLM:\SYSTEM\CurrentControlSet\Services\EventLog\Application\otelcorecol
6 changes: 4 additions & 2 deletions otelcol/collector.go
Expand Up @@ -99,8 +99,9 @@ type Collector struct {

configProvider ConfigProvider

service *service.Service
state *atomic.Int32
serviceConfig *service.Config
service *service.Service
state *atomic.Int32

// shutdownChan is used to terminate the collector.
shutdownChan chan struct{}
Expand Down Expand Up @@ -182,6 +183,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error {
return fmt.Errorf("invalid configuration: %w", err)
}

col.serviceConfig = &cfg.Service
col.service, err = service.New(ctx, service.Settings{
BuildInfo: col.set.BuildInfo,
CollectorConf: conf,
Expand Down
30 changes: 24 additions & 6 deletions otelcol/collector_windows.go
Expand Up @@ -18,6 +18,7 @@ import (
"golang.org/x/sys/windows/svc/eventlog"

"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/service"
)

type windowsService struct {
Expand Down Expand Up @@ -76,11 +77,6 @@ func (s *windowsService) Execute(args []string, requests <-chan svc.ChangeReques
}

func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) error {
// Append to new slice instead of the already existing s.settings.LoggingOptions slice to not change that.
s.settings.LoggingOptions = append(
[]zap.Option{zap.WrapCore(withWindowsCore(elog))},
s.settings.LoggingOptions...,
)
// Parse all the flags manually.
if err := s.flags.Parse(os.Args[1:]); err != nil {
return err
Expand All @@ -96,6 +92,18 @@ func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) e
return err
}

// The logging options need to be in place before the collector Run method is called
// since the telemetry creates the logger at the time of the Run method call.
// However, the zap.WrapCore function needs to read the serviceConfig to determine
// if the Windows Event Log should be used, however, the serviceConfig is also
// only read at the time of the Run method call. To work around this, we pass the
// serviceConfig as a pointer to the logging options, and then read its value
// when the zap.Logger is created by the telemetry.
s.col.set.LoggingOptions = append(
s.col.set.LoggingOptions,
zap.WrapCore(withWindowsCore(elog, &s.col.serviceConfig)),
)

// col.Run blocks until receiving a SIGTERM signal, so needs to be started
// asynchronously, but it will exit early if an error occurs on startup
go func() {
Expand Down Expand Up @@ -192,8 +200,18 @@ func (w windowsEventLogCore) Sync() error {
return w.core.Sync()
}

func withWindowsCore(elog *eventlog.Log) func(zapcore.Core) zapcore.Core {
func withWindowsCore(elog *eventlog.Log, serviceConfig **service.Config) func(zapcore.Core) zapcore.Core {
return func(core zapcore.Core) zapcore.Core {
if serviceConfig != nil {
for _, output := range (*serviceConfig).Telemetry.Logs.OutputPaths {
if output != "stdout" && output != "stderr" {
// A log file was specified in the configuration, so we should not use the Windows Event Log
return core
}
}
}

// Use the Windows Event Log
encoderConfig := zap.NewProductionEncoderConfig()
encoderConfig.LineEnding = "\r\n"
return windowsEventLogCore{core, elog, zapcore.NewConsoleEncoder(encoderConfig)}
Expand Down
185 changes: 185 additions & 0 deletions otelcol/collector_windows_service_test.go
@@ -0,0 +1,185 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:build windows && win32service

package otelcol

import (
"encoding/xml"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"
"golang.org/x/sys/windows/svc"
"golang.org/x/sys/windows/svc/mgr"
)

const (
collectorServiceName = "otelcorecol"
)

// Test the collector as a Windows service.
// The test assumes that the service and respective event source are already created.
// The test also must be executed with administrative privileges.
func TestCollectorAsService(t *testing.T) {
collector_executable, err := filepath.Abs(filepath.Join("..", "bin", "otelcorecol_windows_amd64"))
require.NoError(t, err)
_, err = os.Stat(collector_executable)
require.NoError(t, err)

scm, err := mgr.Connect()
require.NoError(t, err)
defer scm.Disconnect()

service, err := scm.OpenService(collectorServiceName)
require.NoError(t, err)
defer service.Close()

tests := []struct {
name string
configFile string
expectStartFailure bool
customSetup func(*testing.T)
customValidation func(*testing.T)
}{
{
name: "Default",
configFile: filepath.Join("..", "examples", "local", "otel-config.yaml"),
},
{
name: "ConfigFileNotFound",
configFile: filepath.Join(".", "non", "existent", "otel-config.yaml"),
expectStartFailure: true,
},
{
name: "LogToFile",
configFile: filepath.Join(".", "testdata", "otel-log-to-file.yaml"),
customSetup: func(t *testing.T) {
// Create the folder and clean the log file if it exists
programDataPath := os.Getenv("ProgramData")
logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs")
err := os.MkdirAll(logsPath, os.ModePerm)
require.NoError(t, err)

logFilePath := filepath.Join(logsPath, "otelcol.log")
err = os.Remove(logFilePath)
if err != nil && !os.IsNotExist(err) {
require.NoError(t, err)
}
},
customValidation: func(t *testing.T) {
// Check that the log file was created
programDataPath := os.Getenv("ProgramData")
logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs")
logFilePath := filepath.Join(logsPath, "otelcol.log")
fileinfo, err := os.Stat(logFilePath)
require.NoError(t, err)
require.NotEmpty(t, fileinfo.Size())
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
serviceConfig, err := service.Config()
require.NoError(t, err)

// Setup the command line to launch the collector as a service
fullConfigPath, err := filepath.Abs(tt.configFile)
require.NoError(t, err)

serviceConfig.BinaryPathName = fmt.Sprintf("\"%s\" --config \"%s\"", collector_executable, fullConfigPath)
err = service.UpdateConfig(serviceConfig)
require.NoError(t, err)

if tt.customSetup != nil {
tt.customSetup(t)
}

startTime := time.Now()

err = service.Start()
require.NoError(t, err)

expectedState := svc.Running
if tt.expectStartFailure {
expectedState = svc.Stopped
} else {
defer func() {
_, err = service.Control(svc.Stop)
require.NoError(t, err)

require.Eventually(t, func() bool {
status, _ := service.Query()
return status.State == svc.Stopped
}, 10*time.Second, 500*time.Millisecond)
}()
}

// Wait for the service to reach the expected state
require.Eventually(t, func() bool {
status, _ := service.Query()
return status.State == expectedState
}, 10*time.Second, 500*time.Millisecond)

if tt.customValidation != nil {
tt.customValidation(t)
} else {
// Read the events from the otelcorecol source and check that they were emitted after the service
// command started. This is a simple validation that the messages are being logged on the
// Windows event log.
cmd := exec.Command("wevtutil.exe", "qe", "Application", "/c:1", "/rd:true", "/f:RenderedXml", "/q:*[System[Provider[@Name='otelcorecol']]]")
out, err := cmd.CombinedOutput()
require.NoError(t, err)

var e Event
require.NoError(t, xml.Unmarshal([]byte(out), &e))

eventTime, err := time.Parse("2006-01-02T15:04:05.9999999Z07:00", e.System.TimeCreated.SystemTime)
require.NoError(t, err)

require.True(t, eventTime.After(startTime.In(time.UTC)))
}
})
}
}

// Helper types to read the XML events from the event log using wevtutil
type Event struct {
XMLName xml.Name `xml:"Event"`
System System `xml:"System"`
Data string `xml:"EventData>Data"`
}

type System struct {
Provider Provider `xml:"Provider"`
EventID int `xml:"EventID"`
Version int `xml:"Version"`
Level int `xml:"Level"`
Task int `xml:"Task"`
Opcode int `xml:"Opcode"`
Keywords string `xml:"Keywords"`
TimeCreated TimeCreated `xml:"TimeCreated"`
EventRecordID int `xml:"EventRecordID"`
Execution Execution `xml:"Execution"`
Channel string `xml:"Channel"`
Computer string `xml:"Computer"`
}

type Provider struct {
Name string `xml:"Name,attr"`
}

type TimeCreated struct {
SystemTime string `xml:"SystemTime,attr"`
}

type Execution struct {
ProcessID string `xml:"ProcessID,attr"`
ThreadID string `xml:"ThreadID,attr"`
}
30 changes: 30 additions & 0 deletions otelcol/testdata/otel-log-to-file.yaml
@@ -0,0 +1,30 @@
extensions:
zpages:

receivers:
otlp:
protocols:
grpc:
http:

exporters:
debug:
verbosity: detailed

service:
telemetry:
logs:
level: warn
output_paths:
# The folder need to be created prior to starting the collector
- ${ProgramData}\OpenTelemetry\Collector\Logs\otelcol.log

pipelines:
traces:
receivers: [otlp]
exporters: [debug]
metrics:
receivers: [otlp]
exporters: [debug]

extensions: [zpages]

0 comments on commit 8990be3

Please sign in to comment.