resource_tracker/metrics/gpu.rs
1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Per-GPU metrics sourced from all-smi (NVIDIA NVML / AMD / TPU / …).
5/// Populated only when a supported GPU library is detected at runtime;
6/// an empty Vec means no GPUs are present or no driver is installed.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct GpuMetrics {
9 // ------------------------------------------------------------------
10 // Identity - reported by the driver, stable for the lifetime of the run.
11 // ------------------------------------------------------------------
12 /// Vendor-assigned device UUID (stable across reboots for physical GPUs).
13 pub uuid: String,
14 /// Human-readable device name, e.g. "NVIDIA GeForce RTX 4090".
15 pub name: String,
16 /// Device class reported by all-smi: "GPU", "NPU", "TPU", etc.
17 /// Equivalent to the "kind" of accelerator.
18 pub device_type: String,
19 /// Host-level device identifier (slot, bus address, or platform index).
20 pub host_id: String,
21 /// Additional platform-specific identity fields keyed by the driver.
22 /// For NVIDIA these typically include PCI device/vendor IDs and the
23 /// driver version; for AMD, the ASIC name and PCIe topology; for TPU,
24 /// the chip revision. Keys and values vary by vendor.
25 pub detail: HashMap<String, String>,
26
27 // ------------------------------------------------------------------
28 // Dynamic metrics - polled each interval.
29 // ------------------------------------------------------------------
30 /// Core utilisation (0.0–100.0).
31 pub utilization_pct: f64,
32 /// Total VRAM in bytes.
33 pub vram_total_bytes: u64,
34 /// Used VRAM in bytes.
35 pub vram_used_bytes: u64,
36 /// Fraction of VRAM in use (0.0–100.0).
37 pub vram_used_pct: f64,
38 /// Die temperature in degrees Celsius.
39 pub temperature_celsius: u32,
40 /// Power draw in watts.
41 pub power_watts: f64,
42 /// Core clock frequency in MHz.
43 pub frequency_mhz: u32,
44 /// Number of shader/compute cores, if reported by the driver.
45 pub core_count: Option<u32>,
46}