Skip to content

Commit

Permalink
TUN-8731: Implement diag/system endpoint
Browse files Browse the repository at this point in the history
## Summary
This PR will add a new endpoint, "diag/system" to the metrics server that collects system information from different operating systems.

Closes TUN-8731
  • Loading branch information
Luis Neto committed Nov 22, 2024
1 parent e2c2b01 commit aab5364
Show file tree
Hide file tree
Showing 12 changed files with 1,542 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cmd/cloudflared/tunnel/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/cloudflare/cloudflared/config"
"github.com/cloudflare/cloudflared/connection"
"github.com/cloudflare/cloudflared/credentials"
"github.com/cloudflare/cloudflared/diagnostic"
"github.com/cloudflare/cloudflared/edgediscovery"
"github.com/cloudflare/cloudflared/features"
"github.com/cloudflare/cloudflared/ingress"
Expand Down Expand Up @@ -463,8 +464,10 @@ func StartServer(
readinessServer := metrics.NewReadyServer(clientID,
tunnelstate.NewConnTracker(log))
observer.RegisterSink(readinessServer)
diagnosticHandler := diagnostic.NewDiagnosticHandler(log, 0, diagnostic.NewSystemCollectorImpl(buildInfo.CloudflaredVersion))
metricsConfig := metrics.Config{
ReadyServer: readinessServer,
DiagnosticHandler: diagnosticHandler,
QuickTunnelHostname: quickTunnelURL,
Orchestrator: orchestrator,
}
Expand Down
9 changes: 9 additions & 0 deletions diagnostic/consts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package diagnostic

import "time"

const (
defaultCollectorTimeout = time.Second * 10 // This const define the timeout value of a collector operation.
collectorField = "collector" // used for logging purposes
systemCollectorName = "system" // used for logging purposes
)
16 changes: 16 additions & 0 deletions diagnostic/error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package diagnostic

import (
"errors"
)

var (
// Error used when parsing the fields of the output of collector.
ErrInsufficientLines = errors.New("insufficient lines")
// Error used when parsing the lines of the output of collector.
ErrInsuficientFields = errors.New("insufficient fields")
// Error used when given key is not found while parsing KV.
ErrKeyNotFound = errors.New("key not found")
// Error used when tehre is no disk volume information available
ErrNoVolumeFound = errors.New("No disk volume information found")
)
83 changes: 83 additions & 0 deletions diagnostic/handlers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package diagnostic

import (
"context"
"encoding/json"
"net/http"
"time"

"github.com/rs/zerolog"
)

type Handler struct {
log *zerolog.Logger
timeout time.Duration
systemCollector SystemCollector
}

func NewDiagnosticHandler(
log *zerolog.Logger,
timeout time.Duration,
systemCollector SystemCollector,
) *Handler {
if timeout == 0 {
timeout = defaultCollectorTimeout
}

return &Handler{
log,
timeout,
systemCollector,
}
}

func (handler *Handler) SystemHandler(writer http.ResponseWriter, request *http.Request) {
logger := handler.log.With().Str(collectorField, systemCollectorName).Logger()
logger.Info().Msg("Collection started")

defer func() {
logger.Info().Msg("Collection finished")
}()

ctx, cancel := context.WithTimeout(request.Context(), handler.timeout)

defer cancel()

info, rawInfo, err := handler.systemCollector.Collect(ctx)
if err != nil {
logger.Error().Err(err).Msg("error occurred whilst collecting system information")

if rawInfo != "" {
logger.Info().Msg("using raw information fallback")
bytes := []byte(rawInfo)
writeResponse(writer, bytes, &logger)
} else {
logger.Error().Msg("no raw information available")
writer.WriteHeader(http.StatusInternalServerError)
}

return
}

if info == nil {
logger.Error().Msgf("system information collection is nil")
writer.WriteHeader(http.StatusInternalServerError)
}

encoder := json.NewEncoder(writer)

err = encoder.Encode(info)
if err != nil {
logger.Error().Err(err).Msgf("error occurred whilst serializing information")
writer.WriteHeader(http.StatusInternalServerError)
}
}

func writeResponse(writer http.ResponseWriter, bytes []byte, logger *zerolog.Logger) {
bytesWritten, err := writer.Write(bytes)
if err != nil {
logger.Error().Err(err).Msg("error occurred writing response")
} else if bytesWritten != len(bytes) {
logger.Error().Msgf("error incomplete write response %d/%d", bytesWritten, len(bytes))
}
}
108 changes: 108 additions & 0 deletions diagnostic/handlers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package diagnostic_test

import (
"context"
"encoding/json"
"errors"
"io"
"net/http"
"net/http/httptest"
"testing"

"github.com/rs/zerolog"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/cloudflare/cloudflared/diagnostic"
)

type SystemCollectorMock struct{}

const (
systemInformationKey = "sikey"
rawInformationKey = "rikey"
errorKey = "errkey"
)

func setCtxValuesForSystemCollector(
systemInfo *diagnostic.SystemInformation,
rawInfo string,
err error,
) context.Context {
ctx := context.Background()
ctx = context.WithValue(ctx, systemInformationKey, systemInfo)
ctx = context.WithValue(ctx, rawInformationKey, rawInfo)
ctx = context.WithValue(ctx, errorKey, err)

return ctx
}

func (*SystemCollectorMock) Collect(ctx context.Context) (*diagnostic.SystemInformation, string, error) {
si, _ := ctx.Value(systemInformationKey).(*diagnostic.SystemInformation)
ri, _ := ctx.Value(rawInformationKey).(string)
err, _ := ctx.Value(errorKey).(error)

return si, ri, err
}

func TestSystemHandler(t *testing.T) {
t.Parallel()

log := zerolog.Nop()
tests := []struct {
name string
systemInfo *diagnostic.SystemInformation
rawInfo string
err error
statusCode int
}{
{
name: "happy path",
systemInfo: diagnostic.NewSystemInformation(
0, 0, 0, 0,
"string", "string", "string", "string",
"string", "string", nil,
),
rawInfo: "",
err: nil,
statusCode: http.StatusOK,
},
{
name: "on error and raw info", systemInfo: nil,
rawInfo: "raw info", err: errors.New("an error"), statusCode: http.StatusOK,
},
{
name: "on error and no raw info", systemInfo: nil,
rawInfo: "", err: errors.New("an error"), statusCode: http.StatusInternalServerError,
},
{
name: "malformed response", systemInfo: nil, rawInfo: "", err: nil, statusCode: http.StatusInternalServerError,
},
}

for _, tCase := range tests {
t.Run(tCase.name, func(t *testing.T) {
t.Parallel()
handler := diagnostic.NewDiagnosticHandler(&log, 0, &SystemCollectorMock{})
recorder := httptest.NewRecorder()
ctx := setCtxValuesForSystemCollector(tCase.systemInfo, tCase.rawInfo, tCase.err)
request, err := http.NewRequestWithContext(ctx, http.MethodGet, "/diag/syste,", nil)
require.NoError(t, err)
handler.SystemHandler(recorder, request)

assert.Equal(t, tCase.statusCode, recorder.Code)
if tCase.statusCode == http.StatusOK && tCase.systemInfo != nil {
var response diagnostic.SystemInformation

decoder := json.NewDecoder(recorder.Body)
err = decoder.Decode(&response)
require.NoError(t, err)
assert.Equal(t, tCase.systemInfo, &response)
} else if tCase.statusCode == http.StatusOK && tCase.rawInfo != "" {
rawBytes, err := io.ReadAll(recorder.Body)
require.NoError(t, err)
assert.Equal(t, tCase.rawInfo, string(rawBytes))
}
})
}
}
70 changes: 70 additions & 0 deletions diagnostic/system_collector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package diagnostic

import "context"

type DiskVolumeInformation struct {
Name string `json:"name"` // represents the filesystem in linux/macos or device name in windows
SizeMaximum uint64 `json:"sizeMaximum"` // represents the maximum size of the disk in kilobytes
SizeCurrent uint64 `json:"sizeCurrent"` // represents the current size of the disk in kilobytes
}

func NewDiskVolumeInformation(name string, maximum, current uint64) *DiskVolumeInformation {
return &DiskVolumeInformation{
name,
maximum,
current,
}
}

type SystemInformation struct {
MemoryMaximum uint64 `json:"memoryMaximum"` // represents the maximum memory of the system in kilobytes
MemoryCurrent uint64 `json:"memoryCurrent"` // represents the system's memory in use in kilobytes
FileDescriptorMaximum uint64 `json:"fileDescriptorMaximum"` // represents the maximum number of file descriptors of the system
FileDescriptorCurrent uint64 `json:"fileDescriptorCurrent"` // represents the system's file descriptors in use
OsSystem string `json:"osSystem"` // represents the operating system name i.e.: linux, windows, darwin
HostName string `json:"hostName"` // represents the system host name
OsVersion string `json:"osVersion"` // detailed information about the system's release version level
OsRelease string `json:"osRelease"` // detailed information about the system's release
Architecture string `json:"architecture"` // represents the system's hardware platform i.e: arm64/amd64
CloudflaredVersion string `json:"cloudflaredVersion"` // the runtime version of cloudflared
Disk []*DiskVolumeInformation `json:"disk"`
}

func NewSystemInformation(
memoryMaximum,
memoryCurrent,
filesMaximum,
filesCurrent uint64,
osystem,
name,
osVersion,
osRelease,
architecture,
cloudflaredVersion string,
disk []*DiskVolumeInformation,
) *SystemInformation {
return &SystemInformation{
memoryMaximum,
memoryCurrent,
filesMaximum,
filesCurrent,
osystem,
name,
osVersion,
osRelease,
architecture,
cloudflaredVersion,
disk,
}
}

type SystemCollector interface {
// If the collection is successful it will return `SystemInformation` struct,
// an empty string, and a nil error.
// In case there is an error a string with the raw data will be returned
// however the returned string not contain all the data points.
//
// This function expects that the caller sets the context timeout to prevent
// long-lived collectors.
Collect(ctx context.Context) (*SystemInformation, string, error)
}
Loading

0 comments on commit aab5364

Please sign in to comment.