Skip to main content

resource_tracker/metrics/
gpu.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Per-GPU metrics sourced from all-smi (NVIDIA NVML / AMD / TPU / …).
5/// Populated only when a supported GPU library is detected at runtime;
6/// an empty Vec means no GPUs are present or no driver is installed.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct GpuMetrics {
9    // ------------------------------------------------------------------
10    // Identity - reported by the driver, stable for the lifetime of the run.
11    // ------------------------------------------------------------------
12    /// Vendor-assigned device UUID (stable across reboots for physical GPUs).
13    pub uuid: String,
14    /// Human-readable device name, e.g. "NVIDIA GeForce RTX 4090".
15    pub name: String,
16    /// Device class reported by all-smi: "GPU", "NPU", "TPU", etc.
17    /// Equivalent to the "kind" of accelerator.
18    pub device_type: String,
19    /// Host-level device identifier (slot, bus address, or platform index).
20    pub host_id: String,
21    /// Additional platform-specific identity fields keyed by the driver.
22    /// For NVIDIA these typically include PCI device/vendor IDs and the
23    /// driver version; for AMD, the ASIC name and PCIe topology; for TPU,
24    /// the chip revision.  Keys and values vary by vendor.
25    pub detail: HashMap<String, String>,
26
27    // ------------------------------------------------------------------
28    // Dynamic metrics - polled each interval.
29    // ------------------------------------------------------------------
30    /// Core utilisation (0.0–100.0).
31    pub utilization_pct: f64,
32    /// Total VRAM in bytes.
33    pub vram_total_bytes: u64,
34    /// Used VRAM in bytes.
35    pub vram_used_bytes: u64,
36    /// Fraction of VRAM in use (0.0–100.0).
37    pub vram_used_pct: f64,
38    /// Die temperature in degrees Celsius.
39    pub temperature_celsius: u32,
40    /// Power draw in watts.
41    pub power_watts: f64,
42    /// Core clock frequency in MHz.
43    pub frequency_mhz: u32,
44    /// Number of shader/compute cores, if reported by the driver.
45    pub core_count: Option<u32>,
46}