Skip to content

Commit

Permalink
Adding Fleet Active GameServerSet Percentage Metrics (#4021)
Browse files Browse the repository at this point in the history
* Add metric for agones_fleets_active_gameserverset_percentage calculation

* lint fixed

---------

Co-authored-by: Mengye (Max) Gong <[email protected]>
Co-authored-by: Vicente Ferrara <[email protected]>
  • Loading branch information
3 people authored Nov 18, 2024
1 parent 5083d8d commit 244f501
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 7 deletions.
129 changes: 123 additions & 6 deletions build/grafana/dashboard-gameservers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,89 @@ data:
"links": [],
"panels": [
{
"gridPos": {
"h": 6,
"w": 5,
"x": 0,
"y": 0
},
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"targets": [
{
"expr": "sum(agones_fleet_rollout_percent{name=~\"$fleet\", namespace=~\"$namespace\", type=\"current_replicas\"}) / sum(agones_fleet_rollout_percent{name=~\"$fleet\", namespace=~\"$namespace\", type=\"desired_replicas\"}) * 100",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"options": {
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": true
},
"orientation": "horizontal",
"textMode": "value",
"colorMode": "value",
"valueMappings": []
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {},
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "dark-red",
"value": null
},
{
"color": "dark-orange",
"value": 20
},
{
"color": "dark-green",
"value": 50
}
]
},
"unit": "percent",
"links": []
},
"overrides": []
},
"legend": {
"show": false
},
"timeShift": null,
"nullPointMode": "null",
"options": {
"showThresholdLabels": false,
"showThresholdMarkers": true
},
"tooltip": {
"shared": false,
"value_type": "individual"
},
"type": "stat",
"title": "Fleet RollOut Percentage"
},
{
"aliasColors": {},
"breakPoint": "50%",
"cacheTimeout": null,
"combine": {
"label": "Others",
"threshold": 0
},
"fieldConfig": {
"defaults": {
"color": {
Expand All @@ -72,8 +151,8 @@ data:
},
"gridPos": {
"h": 6,
"w": 7,
"x": 0,
"w": 5,
"x": 5,
"y": 0
},
"id": 4,
Expand Down Expand Up @@ -224,8 +303,8 @@ data:
},
"gridPos": {
"h": 6,
"w": 17,
"x": 7,
"w": 14,
"x": 10,
"y": 0
},
"id": 2,
Expand Down Expand Up @@ -401,8 +480,46 @@ data:
"refId": "A"
}
],
"title": "GameServers count per type",
"type": "timeseries"
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GameServer count overview",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"datasource": {
Expand Down
55 changes: 55 additions & 0 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ import (
"agones.dev/agones/pkg/client/informers/externalversions"
listerv1 "agones.dev/agones/pkg/client/listers/agones/v1"
autoscalinglisterv1 "agones.dev/agones/pkg/client/listers/autoscaling/v1"
fleetsv1 "agones.dev/agones/pkg/fleets"
"agones.dev/agones/pkg/util/runtime"
lru "github.com/hashicorp/golang-lru"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.opencensus.io/stats"
"go.opencensus.io/tag"
corev1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -77,6 +79,7 @@ type Controller struct {
gameServerSynced cache.InformerSynced
fleetSynced cache.InformerSynced
fleetLister listerv1.FleetLister
gameServerSetLister listerv1.GameServerSetLister
fasSynced cache.InformerSynced
fasLister autoscalinglisterv1.FleetAutoscalerLister
lock sync.Mutex
Expand All @@ -103,6 +106,8 @@ func NewController(
fasInformer := fas.Informer()
node := kubeInformerFactory.Core().V1().Nodes()

gameServerSets := agonesInformerFactory.Agones().V1().GameServerSets()

// GameServerStateLastChange Contains the time when the GameServer
// changed its state last time
// on delete and state change remove GameServerName key
Expand All @@ -117,6 +122,7 @@ func NewController(
gameServerSynced: gsInformer.HasSynced,
fleetSynced: fInformer.HasSynced,
fleetLister: fleets.Lister(),
gameServerSetLister: gameServerSets.Lister(),
fasSynced: fasInformer.HasSynced,
fasLister: fas.Lister(),
gsCount: GameServerCount{},
Expand Down Expand Up @@ -240,6 +246,8 @@ func (c *Controller) recordFleetChanges(obj interface{}) {
c.recordFleetReplicas(f.Name, f.Namespace, f.Status.Replicas, f.Status.AllocatedReplicas,
f.Status.ReadyReplicas, f.Spec.Replicas, f.Status.ReservedReplicas)

c.recordFleetRolloutPercentage(f)

if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
if f.Status.Counters != nil {
c.recordCounters(f.Name, f.Namespace, f.Status.Counters)
Expand All @@ -250,6 +258,53 @@ func (c *Controller) recordFleetChanges(obj interface{}) {
}
}

func (c *Controller) recordFleetRolloutPercentage(fleet *agonesv1.Fleet) {
list, err := fleetsv1.ListGameServerSetsByFleetOwner(c.gameServerSetLister, fleet)
if err != nil {
c.logger.Errorf("Error listing GameServerSets for fleet %s in namespace %s: %v", fleet.Name, fleet.Namespace, err.Error())
return
}

active, _ := c.filterGameServerSetByActive(fleet, list)

if active == nil {
fleetName := fleet.ObjectMeta.Namespace + "/" + fleet.ObjectMeta.Name
c.logger.Debugf("Could not find active GameServerSet %s", fleetName)
active = fleet.GameServerSet()
}

currentReplicas := active.Status.Replicas
desiredReplicas := fleet.Spec.Replicas

ctx, _ := tag.New(context.Background(), tag.Upsert(keyName, fleet.Name), tag.Upsert(keyNamespace, fleet.GetNamespace()))

// Record current replicas count
RecordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "current_replicas")},
fleetRolloutPercentStats.M(int64(currentReplicas)))

// Record desired replicas count
RecordWithTags(ctx, []tag.Mutator{tag.Upsert(keyType, "desired_replicas")},
fleetRolloutPercentStats.M(int64(desiredReplicas)))
}

// filterGameServerSetByActive returns the active GameServerSet (or nil if it
// doesn't exist) and then the rest of the GameServerSets that are controlled
// by this Fleet
func (c *Controller) filterGameServerSetByActive(fleet *agonesv1.Fleet, list []*agonesv1.GameServerSet) (*agonesv1.GameServerSet, []*agonesv1.GameServerSet) {
var active *agonesv1.GameServerSet
var rest []*agonesv1.GameServerSet

for _, gsSet := range list {
if apiequality.Semantic.DeepEqual(gsSet.Spec.Template, fleet.Spec.Template) {
active = gsSet
} else {
rest = append(rest, gsSet)
}
}

return active, rest
}

func (c *Controller) recordFleetDeletion(obj interface{}) {
_, ok := obj.(*agonesv1.Fleet)
if !ok {
Expand Down
11 changes: 10 additions & 1 deletion pkg/metrics/controller_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
)

const (
fleetRolloutPercent = "fleet_rollout_percent"
fleetReplicaCountName = "fleets_replicas_count"
fleetAutoscalerBufferLimitName = "fleet_autoscalers_buffer_limits"
fleetAutoscalterBufferSizeName = "fleet_autoscalers_buffer_size"
Expand All @@ -44,9 +45,10 @@ var (
fleetAutoscalerViews = []string{fleetAutoscalerBufferLimitName, fleetAutoscalterBufferSizeName, fleetAutoscalerCurrentReplicaCountName,
fleetAutoscalersDesiredReplicaCountName, fleetAutoscalersAbleToScaleName, fleetAutoscalersLimitedName}
// fleetViews are metric views associated with Fleets
fleetViews = append([]string{fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName, fleetCountersName, fleetListsName}, fleetAutoscalerViews...)
fleetViews = append([]string{fleetRolloutPercent, fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName, fleetCountersName, fleetListsName}, fleetAutoscalerViews...)

stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
fleetRolloutPercentStats = stats.Int64("fleets/rollout_percent", "The current fleet rollout percentage", "1")
fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
Expand All @@ -65,6 +67,13 @@ var (
gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)

stateViews = []*view.View{
{
Name: fleetRolloutPercent,
Measure: fleetRolloutPercentStats,
Description: "Measures the current progress of fleet rollout",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyName, keyType, keyNamespace},
},
{
Name: fleetReplicaCountName,
Measure: fleetsReplicasCountStats,
Expand Down

0 comments on commit 244f501

Please sign in to comment.