Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hostname label to exporter metrics #20

Merged
merged 2 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pkg/collector/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ var BatchJobExporterApp = *kingpin.New(
"Prometheus Exporter to export batch job metrics.",
)

// Empty hostname flag (Used only for testing)
var emptyHostnameLabel *bool

// Create a new BatchJobExporter struct
func NewBatchJobExporter() (*BatchJobExporter, error) {
return &BatchJobExporter{
Expand Down Expand Up @@ -91,6 +94,12 @@ func (b *BatchJobExporter) Main() {
toolkitFlags = kingpinflag.AddFlags(&b.App, ":9010")
)

// This is hidden flag only used for e2e testing
emptyHostnameLabel = b.App.Flag(
"collector.empty.hostname.label",
"Use empty hostname in labels. Only for testing. (default is disabled)",
).Hidden().Default("false").Bool()

promlogConfig := &promlog.Config{}
flag.AddFlags(&b.App, promlogConfig)
b.App.Version(version.Print(b.appName))
Expand Down
34 changes: 17 additions & 17 deletions pkg/collector/fixtures/e2e-test-cgroupsv1-output.txt
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
# HELP batchjob_cpu_system_seconds Cumulative CPU system seconds
# TYPE batchjob_cpu_system_seconds gauge
batchjob_cpu_system_seconds{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0.45
batchjob_cpu_system_seconds{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0.45
# HELP batchjob_cpu_total_seconds Cumulative CPU total seconds
# TYPE batchjob_cpu_total_seconds gauge
batchjob_cpu_total_seconds{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 1.012410966
batchjob_cpu_total_seconds{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 1.012410966
# HELP batchjob_cpu_user_seconds Cumulative CPU user seconds
# TYPE batchjob_cpu_user_seconds gauge
batchjob_cpu_user_seconds{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0.39
batchjob_cpu_user_seconds{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0.39
# HELP batchjob_cpus Number of CPUs
# TYPE batchjob_cpus gauge
batchjob_cpus{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_cpus{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which batchjob_exporter was built, and the goos and goarch for the build.
# TYPE batchjob_exporter_build_info gauge
# HELP batchjob_ipmi_dcmi_watts_total Current Power consumption in watts
# TYPE batchjob_ipmi_dcmi_watts_total counter
batchjob_ipmi_dcmi_watts_total 332
batchjob_ipmi_dcmi_watts_total{hostname=""} 332
# HELP batchjob_memory_cache_bytes Memory cache used in bytes
# TYPE batchjob_memory_cache_bytes gauge
batchjob_memory_cache_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 2.1086208e+07
batchjob_memory_cache_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 2.1086208e+07
# HELP batchjob_memory_fail_count Memory fail count
# TYPE batchjob_memory_fail_count gauge
batchjob_memory_fail_count{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_memory_fail_count{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_memory_rss_bytes Memory RSS used in bytes
# TYPE batchjob_memory_rss_bytes gauge
batchjob_memory_rss_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 1.0407936e+07
batchjob_memory_rss_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 1.0407936e+07
# HELP batchjob_memory_total_bytes Memory total in bytes
# TYPE batchjob_memory_total_bytes gauge
batchjob_memory_total_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 2.01362030592e+11
batchjob_memory_total_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 2.01362030592e+11
# HELP batchjob_memory_used_bytes Memory used in bytes
# TYPE batchjob_memory_used_bytes gauge
batchjob_memory_used_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.0194048e+07
batchjob_memory_used_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.0194048e+07
# HELP batchjob_memsw_fail_count Swap fail count
# TYPE batchjob_memsw_fail_count gauge
batchjob_memsw_fail_count{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_memsw_fail_count{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_memsw_total_bytes Swap total in bytes
# TYPE batchjob_memsw_total_bytes gauge
batchjob_memsw_total_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 9.223372036854772e+18
batchjob_memsw_total_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 9.223372036854772e+18
# HELP batchjob_memsw_used_bytes Swap used in bytes
# TYPE batchjob_memsw_used_bytes gauge
batchjob_memsw_used_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.032512e+07
batchjob_memsw_used_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.032512e+07
# HELP batchjob_nvidia_gpu_jobid Batch Job ID of current nVIDIA GPU
# TYPE batchjob_nvidia_gpu_jobid gauge
batchjob_nvidia_gpu_jobid{uuid="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3"} 11000
batchjob_nvidia_gpu_jobid{uuid="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e"} 10000
batchjob_nvidia_gpu_jobid{UUID="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3",hostname="",uuid="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3"} 11000
batchjob_nvidia_gpu_jobid{UUID="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e",hostname="",uuid="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e"} 10000
# HELP batchjob_rapl_package_joules_total Current RAPL package value in joules
# TYPE batchjob_rapl_package_joules_total counter
batchjob_rapl_package_joules_total{index="0",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244
batchjob_rapl_package_joules_total{index="1",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826
batchjob_rapl_package_joules_total{hostname="",index="0",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244
batchjob_rapl_package_joules_total{hostname="",index="1",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826
# HELP batchjob_scrape_collector_duration_seconds batchjob_exporter: Duration of a collector scrape.
# TYPE batchjob_scrape_collector_duration_seconds gauge
# HELP batchjob_scrape_collector_success batchjob_exporter: Whether a collector succeeded.
Expand Down
34 changes: 17 additions & 17 deletions pkg/collector/fixtures/e2e-test-cgroupsv2-output.txt
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
# HELP batchjob_cpu_system_seconds Cumulative CPU system seconds
# TYPE batchjob_cpu_system_seconds gauge
batchjob_cpu_system_seconds{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 115.777502
batchjob_cpu_system_seconds{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 115.777502
# HELP batchjob_cpu_total_seconds Cumulative CPU total seconds
# TYPE batchjob_cpu_total_seconds gauge
batchjob_cpu_total_seconds{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 60491.070351
batchjob_cpu_total_seconds{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 60491.070351
# HELP batchjob_cpu_user_seconds Cumulative CPU user seconds
# TYPE batchjob_cpu_user_seconds gauge
batchjob_cpu_user_seconds{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 60375.292848
batchjob_cpu_user_seconds{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 60375.292848
# HELP batchjob_cpus Number of CPUs
# TYPE batchjob_cpus gauge
batchjob_cpus{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 2
batchjob_cpus{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 2
# HELP batchjob_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which batchjob_exporter was built, and the goos and goarch for the build.
# TYPE batchjob_exporter_build_info gauge
# HELP batchjob_ipmi_dcmi_watts_total Current Power consumption in watts
# TYPE batchjob_ipmi_dcmi_watts_total counter
batchjob_ipmi_dcmi_watts_total 332
batchjob_ipmi_dcmi_watts_total{hostname=""} 332
# HELP batchjob_memory_cache_bytes Memory cache used in bytes
# TYPE batchjob_memory_cache_bytes gauge
batchjob_memory_cache_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_memory_cache_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_memory_fail_count Memory fail count
# TYPE batchjob_memory_fail_count gauge
batchjob_memory_fail_count{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_memory_fail_count{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_memory_rss_bytes Memory RSS used in bytes
# TYPE batchjob_memory_rss_bytes gauge
batchjob_memory_rss_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.098592768e+09
batchjob_memory_rss_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.098592768e+09
# HELP batchjob_memory_total_bytes Memory total in bytes
# TYPE batchjob_memory_total_bytes gauge
batchjob_memory_total_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.294967296e+09
batchjob_memory_total_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.294967296e+09
# HELP batchjob_memory_used_bytes Memory used in bytes
# TYPE batchjob_memory_used_bytes gauge
batchjob_memory_used_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.111491072e+09
batchjob_memory_used_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 4.111491072e+09
# HELP batchjob_memsw_fail_count Swap fail count
# TYPE batchjob_memsw_fail_count gauge
batchjob_memsw_fail_count{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_memsw_fail_count{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_memsw_total_bytes Swap total in bytes
# TYPE batchjob_memsw_total_bytes gauge
batchjob_memsw_total_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} -1
batchjob_memsw_total_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} -1
# HELP batchjob_memsw_used_bytes Swap used in bytes
# TYPE batchjob_memsw_used_bytes gauge
batchjob_memsw_used_bytes{batch="slurm",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
batchjob_memsw_used_bytes{batch="slurm",hostname="",jobaccount="testacc",jobid="1009248",jobuuid="ac28caf5-ce6c-35f6-73fb-47d9d43f7780",step="",task=""} 0
# HELP batchjob_nvidia_gpu_jobid Batch Job ID of current nVIDIA GPU
# TYPE batchjob_nvidia_gpu_jobid gauge
batchjob_nvidia_gpu_jobid{uuid="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3"} 11000
batchjob_nvidia_gpu_jobid{uuid="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e"} 10000
batchjob_nvidia_gpu_jobid{UUID="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3",hostname="",uuid="GPU-61a65011-6571-a6d2-5ab8-66cbb6f7f9c3"} 11000
batchjob_nvidia_gpu_jobid{UUID="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e",hostname="",uuid="GPU-f124aa59-d406-d45b-9481-8fcd694e6c9e"} 10000
# HELP batchjob_rapl_package_joules_total Current RAPL package value in joules
# TYPE batchjob_rapl_package_joules_total counter
batchjob_rapl_package_joules_total{index="0",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244
batchjob_rapl_package_joules_total{index="1",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826
batchjob_rapl_package_joules_total{hostname="",index="0",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:0"} 258218.293244
batchjob_rapl_package_joules_total{hostname="",index="1",path="pkg/collector/fixtures/sys/class/powercap/intel-rapl:1"} 130570.505826
# HELP batchjob_scrape_collector_duration_seconds batchjob_exporter: Duration of a collector scrape.
# TYPE batchjob_scrape_collector_duration_seconds gauge
# HELP batchjob_scrape_collector_success batchjob_exporter: Whether a collector succeeded.
Expand Down
20 changes: 16 additions & 4 deletions pkg/collector/ipmi.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package collector

import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
Expand All @@ -22,6 +23,7 @@ const ipmiCollectorSubsystem = "ipmi_dcmi"

type impiCollector struct {
logger log.Logger
hostname string
execMode string
wattsMetricDesc *prometheus.Desc
}
Expand All @@ -45,14 +47,23 @@ func init() {

// NewIPMICollector returns a new Collector exposing IMPI DCMI power metrics.
func NewIPMICollector(logger log.Logger) (Collector, error) {
var execMode string
var hostname string
var err error

// Get hostname
if !*emptyHostnameLabel {
hostname, err = os.Hostname()
if err != nil {
level.Error(logger).Log("msg", "Failed to get hostname", "err", err)
}
}

wattsMetricDesc := prometheus.NewDesc(
prometheus.BuildFQName(Namespace, ipmiCollectorSubsystem, "watts_total"),
"Current Power consumption in watts", []string{}, nil,
"Current Power consumption in watts", []string{"hostname"}, nil,
)

var execMode string

// Split command
cmdSlice := strings.Split(*ipmiDcmiCmd, " ")

Expand Down Expand Up @@ -81,6 +92,7 @@ func NewIPMICollector(logger log.Logger) (Collector, error) {
outside:
collector := impiCollector{
logger: logger,
hostname: hostname,
execMode: execMode,
wattsMetricDesc: wattsMetricDesc,
}
Expand Down Expand Up @@ -134,7 +146,7 @@ func (c *impiCollector) Update(ch chan<- prometheus.Metric) error {

// Returned value negative == Power Measurement is not avail
if currentPowerConsumption > -1 {
ch <- prometheus.MustNewConstMetric(c.wattsMetricDesc, prometheus.CounterValue, float64(currentPowerConsumption))
ch <- prometheus.MustNewConstMetric(c.wattsMetricDesc, prometheus.CounterValue, float64(currentPowerConsumption), c.hostname)
}
return nil
}
Expand Down
17 changes: 15 additions & 2 deletions pkg/collector/nvidia_gpus.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type Device struct {
type nvidiaGpuJobMapCollector struct {
devices []Device
logger log.Logger
hostname string
gpuJobMapDesc *prometheus.Desc
}

Expand Down Expand Up @@ -116,16 +117,28 @@ func getAllDevices(logger log.Logger) ([]Device, error) {

// NewNvidiaGpuJobMapCollector returns a new Collector exposing batch jobs to nVIDIA GPU ordinals mapping.
func NewNvidiaGpuJobMapCollector(logger log.Logger) (Collector, error) {
var hostname string
var err error

// Get hostname
if !*emptyHostnameLabel {
hostname, err = os.Hostname()
if err != nil {
level.Error(logger).Log("msg", "Failed to get hostname", "err", err)
}
}

allDevices, _ := getAllDevices(logger)
gpuJobMapDesc := prometheus.NewDesc(
prometheus.BuildFQName(Namespace, nvidiaGpuJobMapCollectorSubsystem, "jobid"),
"Batch Job ID of current nVIDIA GPU",
[]string{"uuid"}, nil,
[]string{"hostname", "uuid", "UUID"}, nil,
)

collector := nvidiaGpuJobMapCollector{
devices: allDevices,
logger: logger,
hostname: hostname,
gpuJobMapDesc: gpuJobMapDesc,
}
return &collector, nil
Expand All @@ -135,7 +148,7 @@ func NewNvidiaGpuJobMapCollector(logger log.Logger) (Collector, error) {
func (c *nvidiaGpuJobMapCollector) Update(ch chan<- prometheus.Metric) error {
gpuJobMapper, _ := c.getJobId()
for _, dev := range c.devices {
ch <- prometheus.MustNewConstMetric(c.gpuJobMapDesc, prometheus.GaugeValue, gpuJobMapper[dev.uuid], dev.uuid)
ch <- prometheus.MustNewConstMetric(c.gpuJobMapDesc, prometheus.GaugeValue, gpuJobMapper[dev.uuid], c.hostname, dev.uuid, dev.uuid)
}
return nil
}
Expand Down
27 changes: 19 additions & 8 deletions pkg/collector/rapl.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ import (
const raplCollectorSubsystem = "rapl"

type raplCollector struct {
fs sysfs.FS
logger log.Logger

fs sysfs.FS
logger log.Logger
hostname string
joulesMetricDesc *prometheus.Desc
}

Expand All @@ -39,21 +39,32 @@ var (

// NewRaplCollector returns a new Collector exposing RAPL metrics.
func NewRaplCollector(logger log.Logger) (Collector, error) {
fs, err := sysfs.NewFS(*sysPath)
var hostname string
var err error

// Get hostname
if !*emptyHostnameLabel {
hostname, err = os.Hostname()
if err != nil {
level.Error(logger).Log("msg", "Failed to get hostname", "err", err)
}
}

fs, err := sysfs.NewFS(*sysPath)
if err != nil {
return nil, err
}

joulesMetricDesc := prometheus.NewDesc(
prometheus.BuildFQName(Namespace, raplCollectorSubsystem, "joules_total"),
"Current RAPL value in joules",
[]string{"index", "path", "rapl_zone"}, nil,
[]string{"hostname", "index", "path", "rapl_zone"}, nil,
)

collector := raplCollector{
fs: fs,
logger: logger,
hostname: hostname,
joulesMetricDesc: joulesMetricDesc,
}
return &collector, nil
Expand Down Expand Up @@ -107,25 +118,25 @@ func (c *raplCollector) joulesMetric(z sysfs.RaplZone, v float64) prometheus.Met
fmt.Sprintf("%s_joules_total", SanitizeMetricName(z.Name)),
),
fmt.Sprintf("Current RAPL %s value in joules", z.Name),
[]string{"index", "path"}, nil,
[]string{"hostname", "index", "path"}, nil,
)

return prometheus.MustNewConstMetric(
descriptor,
prometheus.CounterValue,
v,
c.hostname,
index,
z.Path,
)
}

func (c *raplCollector) joulesMetricWithZoneLabel(z sysfs.RaplZone, v float64) prometheus.Metric {
index := strconv.Itoa(z.Index)

return prometheus.MustNewConstMetric(
c.joulesMetricDesc,
prometheus.CounterValue,
v,
c.hostname,
index,
z.Path,
z.Name,
Expand Down
Loading