Skip to main content

resource_tracker/collector/
host.rs

1use crate::metrics::{GpuMetrics, HostInfo};
2
3// ---------------------------------------------------------------------------
4// Host discovery helpers
5// ---------------------------------------------------------------------------
6
7fn read_host_id() -> Option<String> {
8    // AWS instances expose a stable asset tag at this DMI path.
9    let tag = std::fs::read_to_string("/sys/class/dmi/id/board_asset_tag")
10        .ok()
11        .map(|s| s.trim().to_string())
12        .filter(|s| !s.is_empty() && s != "Not Specified");
13    if tag.is_some() {
14        return tag;
15    }
16    // Generic fallback: systemd machine-id.
17    std::fs::read_to_string("/etc/machine-id")
18        .ok()
19        .map(|s| s.trim().to_string())
20        .filter(|s| !s.is_empty())
21}
22
23fn read_host_name() -> Option<String> {
24    let mut buf = vec![0u8; 256];
25    let ret = unsafe { libc::gethostname(buf.as_mut_ptr() as *mut libc::c_char, buf.len()) };
26    if ret != 0 {
27        return None;
28    }
29    let len = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
30    String::from_utf8(buf[..len].to_vec())
31        .ok()
32        .filter(|s| !s.is_empty())
33}
34
35/// First non-loopback IPv4 address discovered via `getifaddrs(3)`.
36fn read_host_ip() -> Option<String> {
37    unsafe {
38        let mut ifap: *mut libc::ifaddrs = std::ptr::null_mut();
39        if libc::getifaddrs(&mut ifap) != 0 {
40            return None;
41        }
42        let mut result: Option<String> = None;
43        let mut ptr = ifap;
44        while !ptr.is_null() {
45            let ifa = &*ptr;
46            if !ifa.ifa_addr.is_null() {
47                let family = (*ifa.ifa_addr).sa_family as i32;
48                if family == libc::AF_INET {
49                    let addr = ifa.ifa_addr as *const libc::sockaddr_in;
50                    // s_addr is stored in network byte order in memory.
51                    // to_ne_bytes() on x86_64 returns the memory bytes directly,
52                    // which are already in dotted-decimal order.
53                    let bytes = (*addr).sin_addr.s_addr.to_ne_bytes();
54                    if bytes[0] != 127 {
55                        result = Some(format!(
56                            "{}.{}.{}.{}",
57                            bytes[0], bytes[1], bytes[2], bytes[3]
58                        ));
59                        break;
60                    }
61                }
62            }
63            ptr = ifa.ifa_next;
64        }
65        libc::freeifaddrs(ifap);
66        result
67    }
68}
69
70/// Returns (vcpu_count, cpu_model) by parsing `/proc/cpuinfo` once.
71fn read_vcpus_and_model() -> (Option<u32>, Option<String>) {
72    let content = match std::fs::read_to_string("/proc/cpuinfo") {
73        Ok(c) => c,
74        Err(_) => return (None, None),
75    };
76    let mut count: u32 = 0;
77    let mut model: Option<String> = None;
78    content.lines().for_each(|line| {
79        if line.starts_with("processor") {
80            count += 1;
81        } else if line.starts_with("model name")
82            && model.is_none()
83            && let Some((_, val)) = line.split_once(':')
84        {
85            model = Some(val.trim().to_string());
86        }
87    });
88    let vcpus = if count > 0 { Some(count) } else { None };
89    (vcpus, model)
90}
91
92/// `MemTotal` from `/proc/meminfo` converted to MiB.
93fn read_memory_mib() -> Option<u64> {
94    let content = std::fs::read_to_string("/proc/meminfo").ok()?;
95    for line in content.lines() {
96        if line.starts_with("MemTotal:") {
97            // Value is in KiB; divide by 1024 to get MiB.
98            let kib: u64 = line.split_whitespace().nth(1)?.parse().ok()?;
99            return Some(kib / 1024);
100        }
101    }
102    None
103}
104
105/// Sum of all non-loop, non-ram block device capacities in GB.
106fn read_storage_gb() -> Option<f64> {
107    let entries = std::fs::read_dir("/sys/block").ok()?;
108    let total: f64 = entries
109        .flatten()
110        .filter_map(|e| {
111            let name = e.file_name().to_string_lossy().to_string();
112            if name.starts_with("loop") || name.starts_with("ram") {
113                return None;
114            }
115            // /sys/block/<dev>/size reports 512-byte sectors.
116            let sectors: u64 = std::fs::read_to_string(format!("/sys/block/{}/size", name))
117                .ok()?
118                .trim()
119                .parse()
120                .ok()?;
121            Some(sectors as f64 * 512.0 / 1_000_000_000.0)
122        })
123        .sum();
124    if total > 0.0 { Some(total) } else { None }
125}
126
127// ---------------------------------------------------------------------------
128// Public host collection
129// ---------------------------------------------------------------------------
130
131/// Collect all host-level metadata. Fast (no network I/O).
132/// Takes a snapshot of GPU info already gathered so GPU-derived fields
133/// (`host_gpu_model`, `host_gpu_count`, `host_gpu_vram_mib`) can be populated
134/// without re-querying the GPU driver.
135pub fn collect_host_info(gpus: &[GpuMetrics]) -> HostInfo {
136    let (host_vcpus, host_cpu_model) = read_vcpus_and_model();
137
138    let (host_gpu_model, host_gpu_count, host_gpu_vram_mib) = if gpus.is_empty() {
139        (None, None, None)
140    } else {
141        let model = Some(gpus[0].name.clone());
142        let count = u32::try_from(gpus.len()).ok();
143        let vram_mib: u64 = gpus.iter().map(|g| g.vram_total_bytes / 1_048_576).sum();
144        (model, count, Some(vram_mib))
145    };
146
147    HostInfo {
148        host_id: read_host_id(),
149        host_name: read_host_name(),
150        host_ip: read_host_ip(),
151        host_allocation: None, // heuristic TBD
152        host_vcpus,
153        host_cpu_model,
154        host_memory_mib: read_memory_mib(),
155        host_gpu_model,
156        host_gpu_count,
157        host_gpu_vram_mib,
158        host_storage_gb: read_storage_gb(),
159    }
160}
161
162// ---------------------------------------------------------------------------
163// Unit tests
164// ---------------------------------------------------------------------------
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use std::collections::HashMap;
170
171    fn fake_gpu(name: &str, vram_total_bytes: u64) -> GpuMetrics {
172        GpuMetrics {
173            uuid: "test-uuid".to_string(),
174            name: name.to_string(),
175            device_type: "GPU".to_string(),
176            host_id: "0".to_string(),
177            detail: HashMap::new(),
178            utilization_pct: 0.0,
179            vram_total_bytes,
180            vram_used_bytes: 0,
181            vram_used_pct: 0.0,
182            temperature_celsius: 0,
183            power_watts: 0.0,
184            frequency_mhz: 0,
185            core_count: None,
186        }
187    }
188
189    // T-HOST-01: no-GPU path returns None for all GPU fields.
190    #[test]
191    fn test_collect_host_info_no_gpus_returns_none_gpu_fields() {
192        let info = collect_host_info(&[]);
193        assert!(
194            info.host_gpu_model.is_none(),
195            "host_gpu_model must be None when no GPUs"
196        );
197        assert!(
198            info.host_gpu_count.is_none(),
199            "host_gpu_count must be None when no GPUs"
200        );
201        assert!(
202            info.host_gpu_vram_mib.is_none(),
203            "host_gpu_vram_mib must be None when no GPUs"
204        );
205    }
206
207    // T-HOST-02: one GPU sets model, count, and VRAM correctly.
208    #[test]
209    fn test_collect_host_info_one_gpu_sets_fields() {
210        // 8 GiB = 8192 MiB
211        let gpu = fake_gpu("TestGPU X100", 8 * 1_073_741_824);
212        let info = collect_host_info(&[gpu]);
213        assert_eq!(info.host_gpu_model.as_deref(), Some("TestGPU X100"));
214        assert_eq!(info.host_gpu_count, Some(1));
215        assert_eq!(info.host_gpu_vram_mib, Some(8192));
216    }
217
218    // T-HOST-03: two GPUs sum VRAM and report count = 2.
219    #[test]
220    fn test_collect_host_info_two_gpus_sums_vram() {
221        let gpu1 = fake_gpu("GPU A", 4 * 1_073_741_824); // 4 GiB
222        let gpu2 = fake_gpu("GPU B", 4 * 1_073_741_824); // 4 GiB
223        let info = collect_host_info(&[gpu1, gpu2]);
224        assert_eq!(info.host_gpu_count, Some(2));
225        assert_eq!(info.host_gpu_vram_mib, Some(8192)); // 8 GiB total
226    }
227
228    // T-HOST-04: hostname is non-empty on any standard Linux host.
229    #[test]
230    fn test_collect_host_info_hostname_present() {
231        let info = collect_host_info(&[]);
232        assert!(
233            info.host_name
234                .as_deref()
235                .map(|s| !s.is_empty())
236                .unwrap_or(false),
237            "host_name should be a non-empty string on a standard Linux host"
238        );
239    }
240
241    // T-HOST-05: host_vcpus is present and positive.
242    #[test]
243    fn test_collect_host_info_vcpus_positive() {
244        let info = collect_host_info(&[]);
245        let vcpus = info.host_vcpus.unwrap_or(0);
246        assert!(
247            vcpus > 0,
248            "host_vcpus must be > 0, got {:?}",
249            info.host_vcpus
250        );
251    }
252}