Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions scripts/smoke-stats.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env node
// Smoke test for the Overview headline stats (marketing backlog P0-03:
// dashboard rendering "0 nodes / 0 VMs" on transient API failures).
//
// The deployed dashboard is a static export rendered client-side with React
// Query, so its HTML carries no stat values — verifying the *rendered* DOM
// would require headless Chrome, which we deliberately avoid here (no new
// deps). Instead this script exercises the exact data path the client uses
// (`getOverviewStats` in src/api/client.ts): the cheap /api/v1/stats call
// for the headline totals, plus the paginated /api/v1/vms and /api/v1/nodes
// fan-outs for the derived breakdowns. It asserts:
//
// 1. /api/v1/stats reports nonzero total_nodes / total_vms / healthy_nodes
// 2. The node fan-out count agrees with stats.total_nodes within 5%
// 3. The 7d-retention VM count (the Overview "Total VMs" headline,
// Decision #110) is nonzero
//
// Usage: node scripts/smoke-stats.mjs [api-base-url]
// default api-base-url: https://rust-scheduler.aleph.im

const BASE_URL = process.argv[2] ?? "https://rust-scheduler.aleph.im";
const MAX_PAGE_SIZE = 200; // mirrors MAX_PAGE_SIZE in src/api/client.ts
const RETENTION_MS = 7 * 86_400_000; // DEFAULT_RETENTION ("7d") in src/lib/filters.ts
const TOLERANCE = 0.05;

let failures = 0;

function check(ok, label, detail) {
const status = ok ? "ok " : "FAIL";
console.log(`[${status}] ${label}${detail ? ` — ${detail}` : ""}`);
if (!ok) failures++;
}

async function fetchJson(path) {
const res = await fetch(`${BASE_URL}${path}`);
if (!res.ok) {
throw new Error(`API error: ${res.status} ${res.statusText} for ${path}`);
}
return res.json();
}

// Mirrors fetchAllPages in src/api/client.ts.
async function fetchAllPages(path) {
const separator = path.includes("?") ? "&" : "?";
const firstPage = await fetchJson(
`${path}${separator}page=1&page_size=${MAX_PAGE_SIZE}`,
);
if (firstPage.pagination.total_pages <= 1) return firstPage.items;
const remaining = Array.from(
{ length: firstPage.pagination.total_pages - 1 },
(_, i) =>
fetchJson(`${path}${separator}page=${i + 2}&page_size=${MAX_PAGE_SIZE}`),
);
const pages = await Promise.all(remaining);
return [firstPage, ...pages].flatMap((p) => p.items);
}

function withinTolerance(a, b) {
if (b === 0) return a === 0;
return Math.abs(a - b) / b <= TOLERANCE;
}

// Mirrors lastActivityMs + applyRetentionWindow in src/lib/filters.ts
// (wire-format field names, since we skip the client's transform step).
function lastActivityMs(vm) {
const t = (s) => (s ? new Date(s).getTime() : Number.NEGATIVE_INFINITY);
return Math.max(
t(vm.last_observed_at),
t(vm.updated_at),
t(vm.allocated_at),
);
}

const stats = await fetchJson("/api/v1/stats");
check(stats.total_nodes > 0, "stats.total_nodes nonzero", String(stats.total_nodes));
check(stats.total_vms > 0, "stats.total_vms nonzero", String(stats.total_vms));
check(
stats.healthy_nodes > 0,
"stats.healthy_nodes nonzero",
String(stats.healthy_nodes),
);

const [nodes, vms] = await Promise.all([
fetchAllPages("/api/v1/nodes"),
fetchAllPages("/api/v1/vms"),
]);

check(
withinTolerance(nodes.length, stats.total_nodes),
"node fan-out agrees with stats.total_nodes (±5%)",
`${nodes.length} fetched vs ${stats.total_nodes} reported`,
);

const cutoff = Date.now() - RETENTION_MS;
const recentVms = vms.filter((vm) => lastActivityMs(vm) >= cutoff).length;
check(
recentVms > 0,
'Overview "Total VMs" headline (7d retention) nonzero',
`${recentVms} of ${vms.length} fetched VMs`,
);

if (failures > 0) {
console.error(`\n${failures} check(s) failed against ${BASE_URL}`);
process.exit(1);
}
console.log(`\nAll checks passed against ${BASE_URL}`);
19 changes: 14 additions & 5 deletions src/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,17 @@ export async function getVM(hash: string): Promise<VmDetail> {
}

export async function getOverviewStats(): Promise<OverviewStats> {
// Headline totals come from the single cheap /stats call. The VM/node
// page fan-outs (dozens of requests) only feed derived breakdowns, so a
// transient failure there degrades those to empty (null → []) instead
// of rejecting the whole query and rendering the headline as "0 / 0".
const [stats, rawVms, rawNodes] = await Promise.all([
fetchApi<ApiStats>("/api/v1/stats"),
fetchAllPages<ApiVmRow>("/api/v1/vms"),
fetchAllPages<ApiNodeRow>("/api/v1/nodes"),
fetchAllPages<ApiVmRow>("/api/v1/vms").catch(() => null),
fetchAllPages<ApiNodeRow>("/api/v1/nodes").catch(() => null),
]);
const nodes = rawNodes.map(transformNode);
const vms = rawVms.map(transformVm);
const nodes = (rawNodes ?? []).map(transformNode);
const vms = (rawVms ?? []).map(transformVm);
return {
totalNodes: stats.total_nodes,
healthyNodes: stats.healthy_nodes,
Expand All @@ -274,7 +278,12 @@ export async function getOverviewStats(): Promise<OverviewStats> {
.length,
removedNodes: nodes.filter((n) => n.status === "removed")
.length,
totalVMs: applyRetentionWindow(vms, DEFAULT_RETENTION, Date.now()).length,
// Retention-window count when the VM list is available (Decision
// #110); all-time total from /stats when the fan-out failed, so the
// headline never collapses to 0 on a transient error.
totalVMs: rawVms
? applyRetentionWindow(vms, DEFAULT_RETENTION, Date.now()).length
: stats.total_vms,
dispatchedVMs: vms.filter((v) => v.status === "dispatched")
.length,
missingVMs: vms.filter((v) => v.status === "missing").length,
Expand Down
20 changes: 18 additions & 2 deletions src/components/stats-bar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type StatProps = {
total: number | undefined;
subtitle: string;
isLoading: boolean;
isError: boolean;
color?: string | undefined;
tint?: string | undefined;
icon?: React.ReactNode;
Expand Down Expand Up @@ -89,11 +90,15 @@ function StatCard({
total,
subtitle,
isLoading,
isError,
color,
tint,
icon,
}: Omit<StatProps, "href">) {
const showRing = color && !isLoading && value !== undefined && total;
// No value to show (initial fetch failed and there is no cached data):
// render an explicit "unavailable" state instead of coercing to 0.
const unavailable = !isLoading && isError && value === undefined;

return (
<div
Expand Down Expand Up @@ -125,6 +130,13 @@ function StatCard({
</div>
{isLoading ? (
<Skeleton className="mt-3 h-11 w-24" />
) : unavailable ? (
<p
aria-label="Data unavailable"
className="mt-3 font-heading text-4xl font-extrabold tracking-tight text-muted-foreground/40"
>
</p>
) : (
<p
className="mt-3 font-heading text-4xl font-extrabold tabular-nums tracking-tight"
Expand All @@ -134,7 +146,7 @@ function StatCard({
</p>
)}
<p className="mt-auto pt-2 text-xs leading-relaxed text-muted-foreground/60">
{subtitle}
{unavailable ? "Data unavailable" : subtitle}
</p>
</div>
);
Expand Down Expand Up @@ -187,7 +199,7 @@ const iconCheck = (
);

export function StatsBar() {
const { data: stats, isLoading } = useOverviewStats();
const { data: stats, isLoading, isError } = useOverviewStats();

const hasDispatched = (stats?.dispatchedVMs ?? 0) > 0;

Expand All @@ -201,6 +213,7 @@ export function StatsBar() {
total={undefined}
subtitle="Compute nodes registered with the scheduler"
isLoading={isLoading}
isError={isError}
href="/nodes"
index={0}
/>
Expand All @@ -210,6 +223,7 @@ export function StatsBar() {
total={stats?.totalNodes}
subtitle="Nodes that passed their last health check"
isLoading={isLoading}
isError={isError}
color="var(--color-success-500)"
tint="var(--color-success-500)"
icon={iconCheck}
Expand All @@ -225,6 +239,7 @@ export function StatsBar() {
total={undefined}
subtitle="VMs active in the last 7 days"
isLoading={isLoading}
isError={isError}
href="/vms"
index={2}
/>
Expand All @@ -234,6 +249,7 @@ export function StatsBar() {
total={stats?.totalVMs}
subtitle="VMs running on their correct assigned node"
isLoading={isLoading}
isError={isError}
icon={iconCheck}
href="/vms?status=dispatched"
index={3}
Expand Down
5 changes: 4 additions & 1 deletion src/hooks/use-overview-stats.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import { useQuery } from "@tanstack/react-query";
import { keepPreviousData, useQuery } from "@tanstack/react-query";
import { getOverviewStats } from "@/api/client";

export function useOverviewStats() {
return useQuery({
queryKey: ["overview-stats"],
queryFn: getOverviewStats,
refetchInterval: 30_000,
// Keep the last-good stats on screen through transient refetch
// failures instead of dropping back to `undefined` (rendered "0").
placeholderData: keepPreviousData,
});
}