From 7dad06f635e735daaf9b0be527a8de6ee26dc81e Mon Sep 17 00:00:00 2001 From: bredamatt <28816406+bredamatt@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:06:28 +0000 Subject: [PATCH 1/2] fix(cu): add pending evals metric for alerts --- servers/cu/src/bootstrap.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/servers/cu/src/bootstrap.js b/servers/cu/src/bootstrap.js index 268366960..934e5a48e 100644 --- a/servers/cu/src/bootstrap.js +++ b/servers/cu/src/bootstrap.js @@ -294,6 +294,15 @@ export const createApis = async (ctx) => { labelNames: ['stream_type', 'message_type', 'process_error'] }) + const pendingEvaluationCounter = MetricsClient.counterWith({})({ + name: 'ao_process_pending_evaluations', + description: 'The total number of pending evaluations on a CU', + labelNames: ['type'] + }) + + pendingEvaluationCounter.set({ type: 'primary' }, primaryWorkQueue.size) + pendingEvaluationCounter.set({ type: 'dry-run' }, dryRunWorkQueue.size) + /** * TODO: Gas can grow to a huge number. We need to make sure this doesn't crash when that happens */ From d0935125a0776d2a2683dfe43500056901cbd166 Mon Sep 17 00:00:00 2001 From: bredamatt <28816406+bredamatt@users.noreply.github.com> Date: Thu, 16 Jan 2025 21:29:49 +0000 Subject: [PATCH 2/2] fix: increment/decrement pendingEvaluationCounter based on appropriate events --- servers/cu/src/bootstrap.js | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/servers/cu/src/bootstrap.js b/servers/cu/src/bootstrap.js index 934e5a48e..b2b057053 100644 --- a/servers/cu/src/bootstrap.js +++ b/servers/cu/src/bootstrap.js @@ -141,6 +141,12 @@ export const createApis = async (ctx) => { onCreateWorker: onCreateWorker('primary') }) const primaryWorkQueue = new PQueue({ concurrency: maxPrimaryWorkerThreads }) + primaryWorkQueue.on('add', () => { + pendingEvaluationCounter.inc(1, { type: 'primary' }) + }) + primaryWorkQueue.on('next', () => { + pendingEvaluationCounter.dec(1, { type: 'primary' }) + }) const maxDryRunWorkerTheads = Math.max( 1, @@ -152,6 +158,12 @@ export const createApis = async (ctx) => { maxQueueSize: ctx.WASM_EVALUATION_WORKERS_DRY_RUN_MAX_QUEUE }) const dryRunWorkQueue = new PQueue({ concurrency: maxDryRunWorkerTheads }) + dryRunWorkQueue.on('add', () => { + pendingEvaluationCounter.inc(1, { type: 'dry-run' }) + }) + dryRunWorkQueue.on('next', () => { + pendingEvaluationCounter.dec(1, { type: 'dry-run' }) + }) const arweave = ArweaveClient.createWalletClient() const address = ArweaveClient.addressWith({ WALLET: ctx.WALLET, arweave }) @@ -300,9 +312,6 @@ export const createApis = async (ctx) => { labelNames: ['type'] }) - pendingEvaluationCounter.set({ type: 'primary' }, primaryWorkQueue.size) - pendingEvaluationCounter.set({ type: 'dry-run' }, dryRunWorkQueue.size) - /** * TODO: Gas can grow to a huge number. We need to make sure this doesn't crash when that happens */ @@ -339,6 +348,7 @@ export const createApis = async (ctx) => { logger }), evaluationCounter, + pendingEvaluationCounter, // gasCounter, saveProcess: AoProcessClient.saveProcessWith({ db, logger }), findEvaluation: AoEvaluationClient.findEvaluationWith({ db, logger }),