Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add probe help #324

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions pkg/exporter/probe/legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ func BuildStandardMetricsLabelValues(entity *nettop.Entity) []string {
return append(metaPodLabels, BuildAdditionalLabelsValues(entity.GetLabels())...)
}

type LegacyMetric struct {
Name string
Help string
}

func InitAdditionalLabels(additionalLabels []string) error {
if len(additionalLabels) == 0 {
return nil
Expand Down Expand Up @@ -84,15 +89,15 @@ func newMetricsName(module, name string) string {

type LegacyCollector func() (map[string]map[uint32]uint64, error)

func NewLegacyBatchMetrics(module string, metrics []string, collector LegacyCollector) prometheus.Collector {
func NewLegacyBatchMetrics(module string, metrics []LegacyMetric, collector LegacyCollector) prometheus.Collector {
return newLegacyBatchMetrics(module, metrics, collector)
}

func newLegacyBatchMetrics(module string, metrics []string, collector LegacyCollector) prometheus.Collector {
func newLegacyBatchMetrics(module string, metrics []LegacyMetric, collector LegacyCollector) prometheus.Collector {
descs := make(map[string]*prometheus.Desc)
for _, m := range metrics {
newName := newMetricsName(module, m)
descs[newName] = prometheus.NewDesc(newName, "", StandardMetricsLabels, nil)
newName := newMetricsName(module, m.Name)
descs[newName] = prometheus.NewDesc(newName, m.Help, StandardMetricsLabels, nil)
}
return &legacyBatchMetrics{
module: module,
Expand Down
16 changes: 14 additions & 2 deletions pkg/exporter/probe/nlconntrack/conntrackmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,19 @@ var (
MaxEntries = "maxentries"

// stats of conntrack status summary
conntrackMetrics = []string{Found, Invalid, Ignore, Insert, InsertFailed, Drop, EarlyDrop, Error, SearchRestart, Entries, MaxEntries}
conntrackMetrics = []probe.LegacyMetric{
{Name: Found, Help: "The total number of tracked connections found in the conntrack table."},
{Name: Invalid, Help: "The total number of invalid connections encountered."},
{Name: Ignore, Help: "The total number of connections that were ignored by the conntrack module."},
{Name: Insert, Help: "The total number of connections inserted into the conntrack table."},
{Name: InsertFailed, Help: "The total number of failed attempts to insert a connection into the conntrack table."},
{Name: Drop, Help: "The total number of connections dropped from the conntrack table."},
{Name: EarlyDrop, Help: "The total number of connections dropped early before they were fully established."},
{Name: Error, Help: "The total number of errors encountered while managing connections in the conntrack table."},
{Name: SearchRestart, Help: "The total number of times the search for a connection entry was restarted."},
{Name: Entries, Help: "The current number of connections tracked in the conntrack table."},
{Name: MaxEntries, Help: "The maximum number of entries allowed in the conntrack table."},
}
)

func metricsProbeCreator() (probe.MetricsProbe, error) {
Expand Down Expand Up @@ -80,7 +92,7 @@ func (c *conntrackMetricsProbe) CollectOnce() (map[string]map[uint32]uint64, err
}

for _, metric := range conntrackMetrics {
resMap[metric] = map[uint32]uint64{uint32(nettop.InitNetns): stats[metric]}
resMap[metric.Name] = map[uint32]uint64{uint32(nettop.InitNetns): stats[metric.Name]}
}

return resMap, nil
Expand Down
11 changes: 9 additions & 2 deletions pkg/exporter/probe/nlqdisc/nlqdiscstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,14 @@ var (
Backlog = "backlog"
Overlimits = "overlimits"

qdiscMetrics = []string{Bytes, Packets, Drops, Qlen, Backlog, Overlimits}
qdiscMetrics = []probe.LegacyMetric{
{Name: Bytes, Help: "The total number of bytes transmitted through the queuing discipline."},
{Name: Packets, Help: "The total number of packets transmitted through the queuing discipline."},
{Name: Drops, Help: "The total number of packets dropped by the queuing discipline."},
{Name: Qlen, Help: "The current length of the queue (the number of packets queued)."},
{Name: Backlog, Help: "The total amount of data currently in the queue (in bytes)."},
{Name: Overlimits, Help: "The total number of packets that exceeded the configured limits."},
}
)

func init() {
Expand Down Expand Up @@ -80,7 +87,7 @@ func (p *Probe) Stop(_ context.Context) error {
func (p *Probe) CollectOnce() (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)
for _, metric := range qdiscMetrics {
resMap[metric] = make(map[uint32]uint64)
resMap[metric.Name] = make(map[uint32]uint64)
}

ets := nettop.GetAllUniqueNetnsEntity()
Expand Down
4 changes: 2 additions & 2 deletions pkg/exporter/probe/procfd/procfd.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ func fdProbeCreator() (probe.MetricsProbe, error) {
Subsystem: probeName,
VariableLabels: probe.StandardMetricsLabels,
SingleMetricsOpts: []probe.SingleMetricsOpts{
{Name: OpenFD, ValueType: prometheus.GaugeValue},
{Name: OpenSocket, ValueType: prometheus.GaugeValue},
{Name: OpenFD, Help: "The total number of open file descriptors for the process", ValueType: prometheus.GaugeValue},
{Name: OpenSocket, Help: "The total number of open sockets for the process", ValueType: prometheus.GaugeValue},
},
}
metrics := probe.NewBatchMetrics(opts, p.collectOnce)
Expand Down
8 changes: 4 additions & 4 deletions pkg/exporter/probe/procio/procio.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ func ioProbeCreator() (probe.MetricsProbe, error) {
Subsystem: probeName,
VariableLabels: probe.StandardMetricsLabels,
SingleMetricsOpts: []probe.SingleMetricsOpts{
{Name: IOReadSyscall, ValueType: prometheus.CounterValue},
{Name: IOWriteSyscall, ValueType: prometheus.CounterValue},
{Name: IOReadBytes, ValueType: prometheus.CounterValue},
{Name: IOWriteBytes, ValueType: prometheus.CounterValue},
{Name: IOReadSyscall, Help: "The total number of read system calls made by the process", ValueType: prometheus.CounterValue},
{Name: IOWriteSyscall, Help: "The total number of write system calls made by the process", ValueType: prometheus.CounterValue},
{Name: IOReadBytes, Help: "The total number of bytes read by the process", ValueType: prometheus.CounterValue},
{Name: IOWriteBytes, Help: "The total number of bytes written by the process", ValueType: prometheus.CounterValue},
},
}
metrics := probe.NewBatchMetrics(opts, p.collectOnce)
Expand Down
8 changes: 7 additions & 1 deletion pkg/exporter/probe/procipvs/ipvsservicestats.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ var (
IncomingBytes = "incomingbytes"
OutgoingBytes = "outgoingbytes"

IPVSMetrics = []string{Connections, IncomingPackets, OutgoingBytes, IncomingBytes, OutgoingPackets}
IPVSMetrics = []probe.LegacyMetric{
{Name: Connections, Help: "The total number of connections handled by the IPVS (IP Virtual Server)"},
{Name: IncomingPackets, Help: "The total number of incoming packets processed by the IPVS"},
{Name: OutgoingBytes, Help: "The total number of bytes sent out by the IPVS"},
{Name: IncomingBytes, Help: "The total number of bytes received by the IPVS"},
{Name: OutgoingPackets, Help: "The total number of outgoing packets processed by the IPVS"},
}
)

func init() {
Expand Down
13 changes: 11 additions & 2 deletions pkg/exporter/probe/procnetdev/procnetdev.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,16 @@ const (
)

var (
NetdevMetrics = []string{RxBytes, RxErrors, TxBytes, TxErrors, RxPackets, RxDropped, TxPackets, TxDropped}
NetdevMetrics = []probe.LegacyMetric{
{Name: RxBytes, Help: "The total number of bytes received on the network interface."},
{Name: RxErrors, Help: "The total number of errors encountered while receiving on the network interface."},
{Name: TxBytes, Help: "The total number of bytes transmitted on the network interface."},
{Name: TxErrors, Help: "The total number of errors encountered while transmitting on the network interface."},
{Name: RxPackets, Help: "The total number of packets received on the network interface."},
{Name: RxDropped, Help: "The total number of received packets that were dropped on the network interface."},
{Name: TxPackets, Help: "The total number of packets transmitted on the network interface."},
{Name: TxDropped, Help: "The total number of transmitted packets that were dropped on the network interface."},
}
)

func init() {
Expand Down Expand Up @@ -60,7 +69,7 @@ func (s *ProcNetdev) CollectOnce() (map[string]map[uint32]uint64, error) {
func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)
for _, m := range NetdevMetrics {
resMap[m] = make(map[uint32]uint64)
resMap[m.Name] = make(map[uint32]uint64)
}

netdev := getAllNetdev(nslist)
Expand Down
74 changes: 38 additions & 36 deletions pkg/exporter/probe/procnetstat/procnetstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,38 +60,40 @@ const (
)

var (
TCPExtMetrics = []string{TCPListenDrops,
TCPListenOverflows,
TCPSynRetrans,
TCPFastRetrans,
TCPRetransFail,
TCPTimeouts,
TCPAbortOnClose,
TCPAbortOnMemory,
TCPAbortOnTimeout,
TCPAbortOnLinger,
TCPAbortOnData,
TCPAbortFailed,
TCPACKSkippedSynRecv,
TCPACKSkippedPAWS,
TCPACKSkippedSeq,
TCPACKSkippedFinWait2,
TCPACKSkippedTimeWait,
TCPACKSkippedChallenge,
TCPRcvQDrop,
TCPMemoryPressures,
TCPMemoryPressuresChrono,
PAWSActive,
PAWSEstab,
EmbryonicRsts,
TCPWinProbe,
TCPKeepAlive,
TCPMTUPFail,
TCPMTUPSuccess,
TCPZeroWindowDrop,
TCPBacklogDrop,
PFMemallocDrop,
TCPWqueueTooBig}
TCPExtMetrics = []probe.LegacyMetric{
{Name: TCPListenDrops, Help: "The total number of TCP connection requests that were dropped because the listen queue was full."},
{Name: TCPListenOverflows, Help: "The total number of times the TCP listen queue has overflown."},
{Name: TCPSynRetrans, Help: "The total number of SYN packets that were retransmitted."},
{Name: TCPFastRetrans, Help: "The total number of fast retransmissions made by TCP."},
{Name: TCPRetransFail, Help: "The total number of failed retransmissions in TCP."},
{Name: TCPTimeouts, Help: "The total number of TCP timeouts."},
{Name: TCPAbortOnClose, Help: "The number of TCP connections that were aborted on close."},
{Name: TCPAbortOnMemory, Help: "The number of TCP connections that were aborted due to memory allocation failures."},
{Name: TCPAbortOnTimeout, Help: "The number of TCP connections that were aborted due to timeouts."},
{Name: TCPAbortOnLinger, Help: "The number of TCP connections that were aborted due to linger timeouts."},
{Name: TCPAbortOnData, Help: "The number of TCP connections that were aborted due to data-related issues."},
{Name: TCPAbortFailed, Help: "The number of attempts to abort TCP connections that failed."},
{Name: TCPACKSkippedSynRecv, Help: "The number of ACKs skipped while in SYN_RECV state."},
{Name: TCPACKSkippedPAWS, Help: "The number of ACKs skipped due to PAWS (Protection Against Wrapped Sequence numbers)."},
{Name: TCPACKSkippedSeq, Help: "The number of ACKs skipped due to sequence number issues."},
{Name: TCPACKSkippedFinWait2, Help: "The number of ACKs skipped while in FIN_WAIT_2 state."},
{Name: TCPACKSkippedTimeWait, Help: "The number of ACKs skipped while in TIME_WAIT state."},
{Name: TCPACKSkippedChallenge, Help: "The number of ACKs skipped due to challenges in the communication."},
{Name: TCPRcvQDrop, Help: "The total number of received packets that were dropped due to queue overflow."},
{Name: TCPMemoryPressures, Help: "The total number of occasions where the TCP stack experienced memory pressure."},
{Name: TCPMemoryPressuresChrono, Help: "Chronological count of TCP memory pressure events."},
{Name: PAWSActive, Help: "Indicates whether the PAWS mechanism is active."},
{Name: PAWSEstab, Help: "The number of established connections utilizing PAWS."},
{Name: EmbryonicRsts, Help: "The number of embryonic (half-open) connections that were reset."},
{Name: TCPWinProbe, Help: "The total number of window probes sent to check for window size."},
{Name: TCPKeepAlive, Help: "The total number of TCP keepalive packets sent."},
{Name: TCPMTUPFail, Help: "The total number of MTU (Maximum Transmission Unit) probe failures."},
{Name: TCPMTUPSuccess, Help: "The total number of successful MTU (Maximum Transmission Unit) discoveries."},
{Name: TCPZeroWindowDrop, Help: "The total number of packets dropped due to a zero window condition."},
{Name: TCPBacklogDrop, Help: "The total number of packets dropped from the TCP backlog queue."},
{Name: PFMemallocDrop, Help: "The total number of packets dropped due to PF_MEMALLOC allocations failing."},
{Name: TCPWqueueTooBig, Help: "The total number of TCP send queue drops due to the queue being too large."},
}
)

func init() {
Expand Down Expand Up @@ -129,7 +131,7 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)

for _, stat := range TCPExtMetrics {
resMap[stat] = make(map[uint32]uint64)
resMap[stat.Name] = make(map[uint32]uint64)
}

for _, et := range nslist {
Expand All @@ -141,13 +143,13 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {

extstats := stats[ProtocolTCPExt]
for _, stat := range TCPExtMetrics {
if _, ok := extstats[stat]; ok {
data, err := strconv.ParseUint(extstats[stat], 10, 64)
if _, ok := extstats[stat.Name]; ok {
data, err := strconv.ParseUint(extstats[stat.Name], 10, 64)
if err != nil {
log.Errorf("%s failed parse stat %s, pid: %d err: %v", probeName, stat, et.GetPid(), err)
continue
}
resMap[stat][uint32(et.GetNetns())] += data
resMap[stat.Name][uint32(et.GetNetns())] += data
}
}
}
Expand Down
52 changes: 47 additions & 5 deletions pkg/exporter/probe/procsnmp/procsnmp.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,53 @@ const (
)

var (
TCPStatMetrcis = []string{TCPActiveOpens, TCPPassiveOpens, TCPRetransSegs, TCPAttemptFails, TCPEstabResets, TCPCurrEstab, TCPInSegs, TCPOutSegs, TCPInErrs, TCPOutRsts}
UDPStatMetrics = []string{UDPInDatagrams, UDPNoPorts, UDPInErrors, UDPOutDatagrams, UDPRcvbufErrors, UDPSndbufErrors, UDPInCsumErrors, UDPIgnoredMulti}
IPMetrics = []string{IPForwarding, IPDefaultTTL, IPInReceives, IPInHdrErrors, IPInAddrErrors, IPForwDatagrams, IPInUnknownProtos, IPInDiscards, IPInDelivers, IPOutRequests, IPOutDiscards, IPOutNoRoutes, IPReasmTimeout, IPReasmReqds, IPReasmOKs, IPReasmFails, IPFragOKs, IPFragFails, IPFragCreates}
TCPStatMetrcis = []probe.LegacyMetric{
{Name: TCPActiveOpens, Help: "The number of active TCP connections opened."},
{Name: TCPPassiveOpens, Help: "The number of passive TCP connections opened (i.e., connections established by accepting incoming connections)."},
{Name: TCPRetransSegs, Help: "The total number of segments that have been retransmitted."},
{Name: TCPAttemptFails, Help: "The number of failed attempts to establish a TCP connection."},
{Name: TCPEstabResets, Help: "The number of established TCP connections that were reset."},
{Name: TCPCurrEstab, Help: "The current number of established TCP connections."},
{Name: TCPInSegs, Help: "The total number of TCP segments received."},
{Name: TCPOutSegs, Help: "The total number of TCP segments sent."},
{Name: TCPInErrs, Help: "The total number of erroneous packets received on TCP."},
{Name: TCPOutRsts, Help: "The total number of TCP segments sent with the RST flag set."},
}

UDPStatMetrics = []probe.LegacyMetric{
{Name: UDPInDatagrams, Help: "The total number of UDP datagrams received."},
{Name: UDPNoPorts, Help: "The total number of UDP datagrams received for which there was no port at the destination."},
{Name: UDPInErrors, Help: "The total number of erroneous received UDP packets."},
{Name: UDPOutDatagrams, Help: "The total number of UDP datagrams sent."},
{Name: UDPRcvbufErrors, Help: "The total number of UDP datagrams dropped due to socket receive buffer errors."},
{Name: UDPSndbufErrors, Help: "The total number of UDP datagrams dropped due to socket send buffer errors."},
{Name: UDPInCsumErrors, Help: "The total number of UDP datagrams received with a checksum error."},
{Name: UDPIgnoredMulti, Help: "The total number of received UDP multicast packets that were ignored."},
}

IPMetrics = []probe.LegacyMetric{
{Name: IPForwarding, Help: "Indicates whether IP forwarding is enabled (1 for enabled, 0 for disabled)."},
{Name: IPDefaultTTL, Help: "The default time-to-live (TTL) value for IP packets."},
{Name: IPInReceives, Help: "The total number of IP packets received."},
{Name: IPInHdrErrors, Help: "The total number of received IP packets that had a header error."},
{Name: IPInAddrErrors, Help: "The total number of received IP packets that were discarded due to address errors."},
{Name: IPForwDatagrams, Help: "The total number of IP packets forwarded by this machine."},
{Name: IPInUnknownProtos, Help: "The total number of received IP packets for which the protocol is not known."},
{Name: IPInDiscards, Help: "The total number of received IP packets that were discarded."},
{Name: IPInDelivers, Help: "The total number of delivered IP packets."},
{Name: IPOutRequests, Help: "The total number of IP packets sent out."},
{Name: IPOutDiscards, Help: "The total number of outgoing IP packets that were discarded."},
{Name: IPOutNoRoutes, Help: "The total number of outgoing IP packets for which no route could be found."},
{Name: IPReasmTimeout, Help: "The total number of times that IP reassembly timed out."},
{Name: IPReasmReqds, Help: "The total number of IP reassembly requests made."},
{Name: IPReasmOKs, Help: "The total number of successful IP reassembly operations."},
{Name: IPReasmFails, Help: "The total number of failed IP reassembly operations."},
{Name: IPFragOKs, Help: "The total number of IP packets that were fragmented successfully."},
{Name: IPFragFails, Help: "The total number of IP packets that failed to fragment."},
{Name: IPFragCreates, Help: "The total number of IP fragments created."},
}

metricsMap = map[string][]string{
metricsMap = map[string][]probe.LegacyMetric{
TCP: TCPStatMetrcis,
UDP: UDPStatMetrics,
IP: IPMetrics,
Expand Down Expand Up @@ -165,7 +207,7 @@ func collect() (map[string]map[string]map[uint32]uint64, error) {
for proto, metricsList := range metricsMap {
res[proto] = make(map[string]map[uint32]uint64)
for _, metrics := range metricsList {
res[proto][metrics] = make(map[uint32]uint64)
res[proto][metrics.Name] = make(map[uint32]uint64)
}
}

Expand Down
13 changes: 10 additions & 3 deletions pkg/exporter/probe/procsock/procsock.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@ const (
)

var (
TCPSockStatMetrics = []string{TCPSockInuse, TCPSockOrphan, TCPSockTimewait, TCPSockeAlloc, TCPSockeMem}
probeName = "sock"
TCPSockStatMetrics = []probe.LegacyMetric{
{Name: TCPSockInuse, Help: "The total number of TCP sockets currently in use."},
{Name: TCPSockOrphan, Help: "The total number of orphaned TCP sockets."},
{Name: TCPSockTimewait, Help: "The total number of TCP sockets in the TIME_WAIT state."},
{Name: TCPSockeAlloc, Help: "The total number of TCP sockets allocated."},
{Name: TCPSockeMem, Help: "The total amount of memory allocated for TCP sockets."},
}

probeName = "sock"
)

func init() {
Expand Down Expand Up @@ -70,7 +77,7 @@ type tcpsockstat struct {
func collect() (resMap map[string]map[uint32]uint64, err error) {
resMap = make(map[string]map[uint32]uint64)
for _, stat := range TCPSockStatMetrics {
resMap[stat] = map[uint32]uint64{}
resMap[stat.Name] = map[uint32]uint64{}
}

// for _, nslogic := range nslist {
Expand Down
9 changes: 6 additions & 3 deletions pkg/exporter/probe/procsoftnet/procsoftnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ const (
)

var (
softnetMetrics = []string{SNProcessed, SNDropped}
softnetMetrics = []probe.LegacyMetric{
{Name: SNProcessed, Help: "The total number of packets processed by the softnet layer"},
{Name: SNDropped, Help: "The total number of packets dropped by the softnet layer"},
}
)

func init() {
Expand Down Expand Up @@ -63,7 +66,7 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
resMap := make(map[string]map[uint32]uint64)

for _, m := range softnetMetrics {
resMap[m] = map[uint32]uint64{}
resMap[m.Name] = map[uint32]uint64{}
}

for _, ns := range nslist {
Expand All @@ -72,7 +75,7 @@ func collect(nslist []*nettop.Entity) (map[string]map[uint32]uint64, error) {
continue
}
for _, m := range softnetMetrics {
resMap[m][uint32(ns.GetNetns())] = stat[m]
resMap[m.Name][uint32(ns.GetNetns())] = stat[m.Name]
}
}
return resMap, nil
Expand Down
Loading
Loading