From 3fa931e0e1a15fd73363c56388d5fac07aecb05a Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Wed, 8 Jan 2025 18:57:54 +0000 Subject: [PATCH 1/5] feat(database monitor): metric for connection duration of 20 slowest --- .../metrics/currentConnections.ts | 44 +++++++++++++++++++ .../src/observability/prometheusMetrics.ts | 2 + 2 files changed, 46 insertions(+) create mode 100644 packages/monitor-deployment/src/observability/metrics/currentConnections.ts diff --git a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts new file mode 100644 index 0000000000..c5c6c7e9fd --- /dev/null +++ b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts @@ -0,0 +1,44 @@ +import prometheusClient from 'prom-client' +import { join } from 'lodash-es' +import type { MetricInitializer } from '@/observability/types.js' + +export const init: MetricInitializer = (config) => { + const { labelNames, namePrefix, logger } = config + const currentConnections = new prometheusClient.Gauge({ + name: join([namePrefix, 'db_connections'], '_'), + help: 'Age of database connections, by sql query', + labelNames: ['query', 'region', ...labelNames] + }) + return async (params) => { + const { dbClients, labels } = params + await Promise.all( + dbClients.map(async ({ client, regionKey }) => { + try { + const currentConnectionResults = await client.raw<{ + rows: [{ datname: string; state: string; query: string; interval: string }] + }>( + ` + SELECT datname, state, query, clock_timestamp() - query_start AS interval + FROM pg_stat_activity + WHERE state <> 'idle' + AND query NOT LIKE '% FROM pg_stat_activity %' + ORDER BY interval DESC + LIMIT 20; + ` + ) + for (const row of currentConnectionResults.rows) { + currentConnections.set( + { ...labels, query: row.query, region: regionKey }, + parseInt(row.interval) + ) + } + } catch (err) { + logger.warn( + { err, region: regionKey }, + "Failed to collect current connections from region '{region}'." + ) + } + }) + ) + } +} diff --git a/packages/monitor-deployment/src/observability/prometheusMetrics.ts b/packages/monitor-deployment/src/observability/prometheusMetrics.ts index 3ea9e20e93..608e9f0c18 100644 --- a/packages/monitor-deployment/src/observability/prometheusMetrics.ts +++ b/packages/monitor-deployment/src/observability/prometheusMetrics.ts @@ -5,6 +5,7 @@ import { join } from 'lodash-es' import { Counter, Histogram, Registry } from 'prom-client' import prometheusClient from 'prom-client' import { init as commits } from '@/observability/metrics/commits.js' +import { init as currentConnections } from '@/observability/metrics/currentConnections.js' import { init as dbMaxLogicalReplicationWorkers } from '@/observability/metrics/dbMaxLogicalReplicationWorkers.js' import { init as dbMaxReplicationSlots } from '@/observability/metrics/dbMaxReplicationSlots.js' import { init as dbMaxSyncWorkersPerSubscription } from '@/observability/metrics/dbMaxSyncWorkersPerSubscription.js' @@ -60,6 +61,7 @@ function initMonitoringMetrics(params: { const metricsToInitialize = [ commits, + currentConnections, dbMaxLogicalReplicationWorkers, dbMaxReplicationSlots, dbMaxSyncWorkersPerSubscription, From d635cb203cb8ce08137f5dbc066590a94a78224d Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Wed, 8 Jan 2025 20:01:57 +0000 Subject: [PATCH 2/5] improve parsing of postgres interval --- packages/monitor-deployment/package.json | 1 + .../metrics/currentConnections.ts | 21 +++++++++++++++++-- yarn.lock | 8 +++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/packages/monitor-deployment/package.json b/packages/monitor-deployment/package.json index 2389bae03a..acc8751b9f 100644 --- a/packages/monitor-deployment/package.json +++ b/packages/monitor-deployment/package.json @@ -42,6 +42,7 @@ "pino": "^8.7.0", "pino-http": "^8.2.1", "pino-pretty": "^9.1.1", + "postgres-interval": "^4.0.2", "prom-client": "^14.0.1", "znv": "^0.4.0", "zod": "^3.24.1" diff --git a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts index c5c6c7e9fd..7ce356aa38 100644 --- a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts +++ b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts @@ -1,6 +1,7 @@ import prometheusClient from 'prom-client' import { join } from 'lodash-es' import type { MetricInitializer } from '@/observability/types.js' +import Interval from 'postgres-interval' export const init: MetricInitializer = (config) => { const { labelNames, namePrefix, logger } = config @@ -22,14 +23,18 @@ export const init: MetricInitializer = (config) => { FROM pg_stat_activity WHERE state <> 'idle' AND query NOT LIKE '% FROM pg_stat_activity %' + AND query NOT LIKE 'START_REPLICATION SLOT %' + AND query NOT LIKE '' ORDER BY interval DESC - LIMIT 20; + LIMIT 100; ` ) for (const row of currentConnectionResults.rows) { + const interval = Interval(row.interval) + currentConnections.set( { ...labels, query: row.query, region: regionKey }, - parseInt(row.interval) + intervalToMilliseconds(interval) ) } } catch (err) { @@ -42,3 +47,15 @@ export const init: MetricInitializer = (config) => { ) } } + +const intervalToMilliseconds = (interval: Interval.IPostgresInterval) => { + return ( + interval.years * 31536000000 + //assumes 365 days exactly + interval.months * 2592000000 + //assumes 30 days + interval.days * 86400000 + + interval.hours * 3600000 + + interval.minutes * 60000 + + interval.seconds * 1000 + + interval.milliseconds + ) +} diff --git a/yarn.lock b/yarn.lock index 002fa82344..d812867a6e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -17075,6 +17075,7 @@ __metadata: pino: "npm:^8.7.0" pino-http: "npm:^8.2.1" pino-pretty: "npm:^9.1.1" + postgres-interval: "npm:^4.0.2" prettier: "npm:^2.5.1" prom-client: "npm:^14.0.1" rimraf: "npm:^5.0.7" @@ -44575,6 +44576,13 @@ __metadata: languageName: node linkType: hard +"postgres-interval@npm:^4.0.2": + version: 4.0.2 + resolution: "postgres-interval@npm:4.0.2" + checksum: 10/8eb7487d24c2752c5bd0798e0039e336ba5814f19a1e60926f9e4ecbbaea85387aa1871175085e90bc2a3bc354c8bfe0ef0b7a162817cff7d4b4714b83f9f1bd + languageName: node + linkType: hard + "prebuild-install@npm:^7.1.1": version: 7.1.1 resolution: "prebuild-install@npm:7.1.1" From a929993ae6fdbdd5b4c49c4d5d1c0056b07baf09 Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Thu, 9 Jan 2025 11:41:15 +0000 Subject: [PATCH 3/5] Use postgres magic to calculate interval in milliseconds instead of nodejs --- packages/monitor-deployment/package.json | 1 - .../metrics/currentConnections.ts | 23 ++++--------------- yarn.lock | 8 ------- 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/packages/monitor-deployment/package.json b/packages/monitor-deployment/package.json index acc8751b9f..2389bae03a 100644 --- a/packages/monitor-deployment/package.json +++ b/packages/monitor-deployment/package.json @@ -42,7 +42,6 @@ "pino": "^8.7.0", "pino-http": "^8.2.1", "pino-pretty": "^9.1.1", - "postgres-interval": "^4.0.2", "prom-client": "^14.0.1", "znv": "^0.4.0", "zod": "^3.24.1" diff --git a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts index 7ce356aa38..dcb3e7317a 100644 --- a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts +++ b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts @@ -1,13 +1,12 @@ import prometheusClient from 'prom-client' import { join } from 'lodash-es' import type { MetricInitializer } from '@/observability/types.js' -import Interval from 'postgres-interval' export const init: MetricInitializer = (config) => { const { labelNames, namePrefix, logger } = config const currentConnections = new prometheusClient.Gauge({ name: join([namePrefix, 'db_connections'], '_'), - help: 'Age of database connections, by sql query', + help: 'Age of database connections, by sql query, in milliseconds', labelNames: ['query', 'region', ...labelNames] }) return async (params) => { @@ -16,10 +15,10 @@ export const init: MetricInitializer = (config) => { dbClients.map(async ({ client, regionKey }) => { try { const currentConnectionResults = await client.raw<{ - rows: [{ datname: string; state: string; query: string; interval: string }] + rows: [{ datname: string; state: string; query: string; interval: number }] }>( ` - SELECT datname, state, query, clock_timestamp() - query_start AS interval + SELECT datname, state, query, ROUND((EXTRACT(EPOCH FROM clock_timestamp()) - EXTRACT(EPOCH FROM query_start)) * 1000) AS interval FROM pg_stat_activity WHERE state <> 'idle' AND query NOT LIKE '% FROM pg_stat_activity %' @@ -30,11 +29,9 @@ export const init: MetricInitializer = (config) => { ` ) for (const row of currentConnectionResults.rows) { - const interval = Interval(row.interval) - currentConnections.set( { ...labels, query: row.query, region: regionKey }, - intervalToMilliseconds(interval) + row.interval ) } } catch (err) { @@ -47,15 +44,3 @@ export const init: MetricInitializer = (config) => { ) } } - -const intervalToMilliseconds = (interval: Interval.IPostgresInterval) => { - return ( - interval.years * 31536000000 + //assumes 365 days exactly - interval.months * 2592000000 + //assumes 30 days - interval.days * 86400000 + - interval.hours * 3600000 + - interval.minutes * 60000 + - interval.seconds * 1000 + - interval.milliseconds - ) -} diff --git a/yarn.lock b/yarn.lock index d812867a6e..002fa82344 100644 --- a/yarn.lock +++ b/yarn.lock @@ -17075,7 +17075,6 @@ __metadata: pino: "npm:^8.7.0" pino-http: "npm:^8.2.1" pino-pretty: "npm:^9.1.1" - postgres-interval: "npm:^4.0.2" prettier: "npm:^2.5.1" prom-client: "npm:^14.0.1" rimraf: "npm:^5.0.7" @@ -44576,13 +44575,6 @@ __metadata: languageName: node linkType: hard -"postgres-interval@npm:^4.0.2": - version: 4.0.2 - resolution: "postgres-interval@npm:4.0.2" - checksum: 10/8eb7487d24c2752c5bd0798e0039e336ba5814f19a1e60926f9e4ecbbaea85387aa1871175085e90bc2a3bc354c8bfe0ef0b7a162817cff7d4b4714b83f9f1bd - languageName: node - linkType: hard - "prebuild-install@npm:^7.1.1": version: 7.1.1 resolution: "prebuild-install@npm:7.1.1" From 1b48c5fbeaebfb2d0b5dcd60d38cf926e9a04524 Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:00:23 +0000 Subject: [PATCH 4/5] treat numeric as string and parse int --- .../src/observability/metrics/currentConnections.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts index dcb3e7317a..d7c5ff3e74 100644 --- a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts +++ b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts @@ -15,7 +15,7 @@ export const init: MetricInitializer = (config) => { dbClients.map(async ({ client, regionKey }) => { try { const currentConnectionResults = await client.raw<{ - rows: [{ datname: string; state: string; query: string; interval: number }] + rows: [{ datname: string; state: string; query: string; interval: string }] }>( ` SELECT datname, state, query, ROUND((EXTRACT(EPOCH FROM clock_timestamp()) - EXTRACT(EPOCH FROM query_start)) * 1000) AS interval @@ -31,7 +31,7 @@ export const init: MetricInitializer = (config) => { for (const row of currentConnectionResults.rows) { currentConnections.set( { ...labels, query: row.query, region: regionKey }, - row.interval + parseInt(row.interval) ) } } catch (err) { From b8c96455d214b7c6eaf70458af54b697e775d57d Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Thu, 9 Jan 2025 15:04:10 +0000 Subject: [PATCH 5/5] Remove all previous labels of metric prior to updating --- .../src/observability/metrics/currentConnections.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts index d7c5ff3e74..342c51abea 100644 --- a/packages/monitor-deployment/src/observability/metrics/currentConnections.ts +++ b/packages/monitor-deployment/src/observability/metrics/currentConnections.ts @@ -28,6 +28,7 @@ export const init: MetricInitializer = (config) => { LIMIT 100; ` ) + currentConnections.reset() for (const row of currentConnectionResults.rows) { currentConnections.set( { ...labels, query: row.query, region: regionKey },