diff --git a/pjs/common/health-check.js b/pjs/common/health-check.js index 5976629..0c630c4 100644 --- a/pjs/common/health-check.js +++ b/pjs/common/health-check.js @@ -44,7 +44,10 @@ metrics.fgwUpstreamStatus.withLabels( name, target.ip, - target.port + target.port, + 'ok', + target.reason = '', + target.http_status || '' ).increase() ), @@ -67,8 +70,11 @@ metrics.fgwUpstreamStatus.withLabels( name, target.ip, - target.port - ).set(0) + target.port, + 'fail', + target.reason || '', + target.http_status || '' + ).decrease() ), available: target => ( @@ -108,11 +114,12 @@ check: target => ( new http.Agent(target.target).request('GET', uri).then( result => ( + target.http_status = result?.head?.status, target.service.match(result) ? ( target.service.ok(target) ) : ( - target.service.fail(target), - target.reason = "status " + result?.head?.status + target.reason = "BadStatus", + target.service.fail(target) ), {} ) @@ -221,6 +228,7 @@ (!e.error || e.error === "ReadTimeout" || e.error === "IdleTimeout") ? ( _target.service.ok(_target) ) : ( + _target.reason = 'ConnectionRefused', _target.service.fail(_target) ), _resolve(), diff --git a/pjs/common/resource-usage.js b/pjs/common/resource-usage.js new file mode 100644 index 0000000..9c468a4 --- /dev/null +++ b/pjs/common/resource-usage.js @@ -0,0 +1,42 @@ +( + ( + { config } = pipy.solve('config.js'), + { metrics } = pipy.solve('lib/metrics.js'), + cpuUsage = ( + ( + items = os.readFile('/proc/self/stat')?.toString?.()?.split?.(" "), + su = os.readFile('/proc/uptime')?.toString?.()?.split?.(".")?.[0], + dr, + ur, + ) => ( + items && su && ( + dr = su - items[21] / 100, + ur = +items[13] + +items[14], + (ur / (dr < 0 ? 1 : dr)).toFixed(2) + ) + ) + ), + memSize = os.readFile('/proc/meminfo')?.toString?.()?.split?.('\n')?.filter?.(s => s.startsWith('MemTotal'))?.[0]?.split?.(' ')?.filter?.(e => e)?.[1], + memUsage = ( + ( + ram = os.readFile('/proc/self/statm')?.toString?.()?.split?.(' ')?.[1], + ) => ( + (+ram * 4 * 100 / memSize).toFixed(2) + ) + ), + hostname = pipy.exec('hostname')?.toString?.()?.replaceAll?.('\n', ''), + cpuUsageMetric = metrics.fgwResourceUsage.withLabels(pipy.uuid || '', pipy.name || '', pipy.source || '', hostname, 'cpu'), + memUsageMetric = metrics.fgwResourceUsage.withLabels(pipy.uuid || '', pipy.name || '', pipy.source || '', hostname, 'mem'), + ) => pipy() + +.pipeline() +.task(config.Configs.ResourceUsage.ScrapeInterval + 's') +.onStart( + () => ( + cpuUsageMetric.set(+cpuUsage()), + memUsageMetric.set(+memUsage()), + new StreamEnd + ) +) + +)() \ No newline at end of file diff --git a/pjs/config.json b/pjs/config.json index 77de81b..8271ffb 100644 --- a/pjs/config.json +++ b/pjs/config.json @@ -1,6 +1,8 @@ { "Configs": { - "EnableDebug": true + "ResourceUsage": { + "ScrapeInterval": 5 + } }, "Listeners": [ { @@ -77,7 +79,9 @@ "Matches": [ { "Type": "status", - "Value": [ 200 ] + "Value": [ + 200 + ] } ] }, @@ -152,6 +156,5 @@ "tcp/forward.js" ] }, - "Version": "0" } diff --git a/pjs/lib/metrics.js b/pjs/lib/metrics.js index c08bbec..90b0905 100644 --- a/pjs/lib/metrics.js +++ b/pjs/lib/metrics.js @@ -9,6 +9,14 @@ 'k8sCluster' ]), + fgwResourceUsage = new stats.Gauge('fgw_resource_usage', [ + 'uuid', + 'name', + 'codeBase', + 'host', + 'type' + ]), + fgwHttpStatus = new stats.Counter('fgw_http_status', [ 'service', 'code', 'route', 'matched_uri', 'matched_host', 'consumer', 'node' ]), @@ -24,7 +32,7 @@ ]), fgwUpstreamStatus = new stats.Gauge('fgw_upstream_status', [ - 'name', 'ip', 'port' + 'name', 'ip', 'port', 'status', 'type', 'http_status' ]), fgwHttpLatency = new stats.Histogram('fgw_http_latency', [ @@ -55,6 +63,7 @@ metrics = { fgwMetaInfo, // main.js + fgwResourceUsage, // resource-usage.js fgwHttpRequestsTotal, // codec.js fgwHttpCurrentConnections, // codec.js fgwUpstreamStatus, // health-check.js diff --git a/pjs/main.js b/pjs/main.js index 75c6627..1fb7a07 100644 --- a/pjs/main.js +++ b/pjs/main.js @@ -32,6 +32,13 @@ metrics.fgwMetaInfo.withLabels(pipy.uuid || '', pipy.name || '', pipy.source || '', os.env.PIPY_K8S_CLUSTER || '').increase() ) ) +.branch( + (config?.Configs?.ResourceUsage?.ScrapeInterval > 0), ( + $=>$ + .task() + .use('common/resource-usage.js') + ) +) .repeat( (config.Listeners || []),