diff --git a/logfire/_internal/integrations/system_metrics.py b/logfire/_internal/integrations/system_metrics.py index 4eb5522f1..9a4d76ec6 100644 --- a/logfire/_internal/integrations/system_metrics.py +++ b/logfire/_internal/integrations/system_metrics.py @@ -31,6 +31,7 @@ 'system.cpu.simple_utilization', 'system.cpu.time', 'system.cpu.utilization', + 'system.cpu.load_average.1m', 'system.memory.usage', 'system.memory.utilization', 'system.swap.usage', @@ -43,6 +44,7 @@ 'system.network.errors', 'system.network.io', 'system.network.connections', + 'system.processes.count', 'system.thread_count', 'process.open_file_descriptor.count', 'process.context_switches', @@ -62,6 +64,7 @@ 'system.cpu.simple_utilization', 'system.cpu.time', 'system.cpu.utilization', + 'system.cpu.load_average.1m', 'system.memory.usage', 'system.memory.utilization', 'system.swap.usage', @@ -74,6 +77,7 @@ 'system.network.errors', 'system.network.io', 'system.network.connections', + 'system.processes.count', 'system.thread_count', 'process.open_file_descriptor.count', 'process.context_switches', @@ -109,6 +113,11 @@ 'process.cpu.core_utilization': None, 'system.cpu.time': CPU_FIELDS, 'system.cpu.utilization': CPU_FIELDS, + # The Hosts page reads these — upstream `_DEFAULT_CONFIG` doesn't include + # them and they aren't expensive (one number per scrape each), so add them + # so `base='full'` covers the page without a follow-up `config={…}` knob. + 'system.cpu.load_average.1m': None, + 'system.processes.count': None, # For usage, knowing the total amount of bytes available might be handy. 'system.memory.usage': MEMORY_FIELDS + ['total'], # For utilization, the total is always just 1 (100%), so it's not included. @@ -173,6 +182,14 @@ def instrument_system_metrics(logfire_instance: Logfire, config: Config | None = measure_process_cpu_utilization(logfire_instance) del config['process.cpu.utilization'] + if 'system.cpu.load_average.1m' in config: + measure_system_cpu_load_average_1m(logfire_instance) + del config['system.cpu.load_average.1m'] + + if 'system.processes.count' in config: + measure_system_processes_count(logfire_instance) + del config['system.processes.count'] + instrumentor = SystemMetricsInstrumentor(config=config) # pyright: ignore[reportArgumentType] instrumentor.instrument(meter_provider=logfire_instance.config.get_meter_provider()) @@ -248,3 +265,41 @@ def callback(_options: CallbackOptions) -> Iterable[Observation]: description='Runtime CPU utilization, not divided by the number of available cores.', unit='core', ) + + +def measure_system_cpu_load_average_1m(logfire_instance: Logfire): + """1-minute system load average (run-queue length, smoothed). + + Upstream `SystemMetricsInstrumentor` doesn't include this metric; we add it + so `base='full'` covers the Hosts page's `Load 1m` column. On Windows, + `psutil.getloadavg()` is an emulated polling shim — first call returns 0 + and subsequent values update every ~5s. + """ + + def callback(_options: CallbackOptions) -> Iterable[Observation]: + yield Observation(psutil.getloadavg()[0]) + + logfire_instance.metric_gauge_callback( + 'system.cpu.load_average.1m', + [callback], + description='Average system load over the last minute.', + unit='{run_queue_item}', + ) + + +def measure_system_processes_count(logfire_instance: Logfire): + """Total number of processes on the system. + + Upstream `SystemMetricsInstrumentor` doesn't include this metric; we add + it so `base='full'` covers the Hosts page's `Procs` column. + """ + + def callback(_options: CallbackOptions) -> Iterable[Observation]: + yield Observation(len(psutil.pids())) + + logfire_instance.metric_gauge_callback( + 'system.processes.count', + [callback], + description='Total number of processes on the system.', + unit='{process}', + ) diff --git a/tests/otel_integrations/test_system_metrics.py b/tests/otel_integrations/test_system_metrics.py index ea30702b2..a25876ea2 100644 --- a/tests/otel_integrations/test_system_metrics.py +++ b/tests/otel_integrations/test_system_metrics.py @@ -67,6 +67,7 @@ def test_all_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> 'process.open_file_descriptor.count', 'process.runtime.cpython.gc_count', 'process.thread.count', + 'system.cpu.load_average.1m', 'system.cpu.simple_utilization', 'system.cpu.time', 'system.cpu.utilization', @@ -79,6 +80,7 @@ def test_all_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> 'system.network.errors', 'system.network.io', 'system.network.packets', + 'system.processes.count', 'system.swap.usage', 'system.swap.utilization', 'system.thread_count', @@ -92,6 +94,18 @@ def test_measure_process_runtime_cpu_utilization(metrics_reader: InMemoryMetricR assert get_collected_metric_names(metrics_reader) == ['process.runtime.cpython.cpu.utilization'] +def test_system_cpu_load_average_1m(metrics_reader: InMemoryMetricReader) -> None: + """Load average isn't in upstream `SystemMetricsInstrumentor` — Logfire emits it.""" + logfire.instrument_system_metrics({'system.cpu.load_average.1m': None}, base=None) + assert get_collected_metric_names(metrics_reader) == ['system.cpu.load_average.1m'] + + +def test_system_processes_count(metrics_reader: InMemoryMetricReader) -> None: + """Process count isn't in upstream `SystemMetricsInstrumentor` — Logfire emits it.""" + logfire.instrument_system_metrics({'system.processes.count': None}, base=None) + assert get_collected_metric_names(metrics_reader) == ['system.processes.count'] + + def test_custom_system_metrics_collection(metrics_reader: InMemoryMetricReader) -> None: logfire.instrument_system_metrics( { @@ -186,6 +200,8 @@ def test_full_base(): 'cpython.gc.collected_objects': None, 'cpython.gc.collections': None, 'cpython.gc.uncollectable_objects': None, + 'system.cpu.load_average.1m': None, + 'system.processes.count': None, }, 'Docs and the MetricName type need to be updated if this test fails'