Skip to main content

resource_tracker/output/
csv.rs

1use crate::metrics::Sample;
2
3/// CSV header using the same `system_`/`process_` prefix convention as
4/// Python resource-tracker.  System columns (21) cover host-wide metrics;
5/// process columns (11) cover the tracked PID tree.
6///
7/// Unit notes:
8///   system_cpu_usage    - fractional cores (0..N), same as Python
9///   system_memory_*_mib - mebibytes (MiB = 1,048,576 bytes)
10///   system_disk_*       - bytes per interval, same as Python
11///   system_net_*        - bytes per interval, same as Python
12///   system_disk_space_* - GB summed across all block-device mounts
13///   system_gpu_vram_mib - MiB, same as Python
14///   process_cpu_usage   - fractional cores consumed by tracked PID tree
15///
16/// Process fields not yet collected are emitted as empty strings.
17pub fn csv_header() -> &'static str {
18    "timestamp,\
19     system_processes,system_utime,system_stime,system_cpu_usage,\
20     system_memory_free_mib,system_memory_used_mib,system_memory_buffers_mib,\
21     system_memory_cached_mib,system_memory_active_mib,system_memory_inactive_mib,\
22     system_disk_read_bytes,system_disk_write_bytes,\
23     system_disk_space_total_gb,system_disk_space_used_gb,system_disk_space_free_gb,\
24     system_net_recv_bytes,system_net_sent_bytes,\
25     system_gpu_usage,system_gpu_vram_mib,system_gpu_utilized,\
26     process_pid,process_children,process_utime,process_stime,process_cpu_usage,\
27     process_memory_mib,process_disk_read_bytes,process_disk_write_bytes,\
28     process_gpu_usage,process_gpu_vram_mib,process_gpu_utilized"
29}
30
31/// Serialize a `Sample` as a single CSV row (no newline).
32///
33/// `interval_secs` is required to convert bytes/sec rates into per-interval
34/// byte counts, matching Python resource-tracker's convention.
35///
36/// Process fields not yet collected are emitted as empty strings.
37/// All process fields are empty when no PID is being tracked.
38pub fn sample_to_csv_row(s: &Sample, interval_secs: u64) -> String {
39    // system_cpu_usage: utilization_pct is already in fractional cores (0..N_cores)
40    let cpu_usage = s.cpu.utilization_pct;
41
42    // Disk I/O: per-interval byte counts (rate × interval ≈ bytes in this window)
43    let secs = f64::from(u32::try_from(interval_secs).unwrap_or(u32::MAX));
44    let disk_read: u64 = s
45        .disk
46        .iter()
47        .map(|d| (d.read_bytes_per_sec * secs) as u64)
48        .sum();
49    let disk_write: u64 = s
50        .disk
51        .iter()
52        .map(|d| (d.write_bytes_per_sec * secs) as u64)
53        .sum();
54
55    // Disk space: sum all mounts; used = total - free (includes root-reserved blocks)
56    let disk_space_total: f64 = s
57        .disk
58        .iter()
59        .flat_map(|d| d.mounts.iter())
60        .map(|m| m.total_bytes as f64 / 1_000_000_000.0)
61        .sum();
62    let disk_space_free: f64 = s
63        .disk
64        .iter()
65        .flat_map(|d| d.mounts.iter())
66        .map(|m| m.available_bytes as f64 / 1_000_000_000.0)
67        .sum();
68    let disk_space_used = disk_space_total - disk_space_free;
69
70    // Network I/O: per-interval byte counts
71    let net_recv: u64 = s
72        .network
73        .iter()
74        .map(|n| (n.rx_bytes_per_sec * secs) as u64)
75        .sum();
76    let net_sent: u64 = s
77        .network
78        .iter()
79        .map(|n| (n.tx_bytes_per_sec * secs) as u64)
80        .sum();
81
82    // GPU system aggregates
83    let gpu_usage: f64 = s.gpu.iter().map(|g| g.utilization_pct / 100.0).sum();
84    let gpu_vram: f64 = s
85        .gpu
86        .iter()
87        .map(|g| g.vram_used_bytes as f64 / 1_048_576.0)
88        .sum();
89    let gpu_utilized: u32 =
90        u32::try_from(s.gpu.iter().filter(|g| g.utilization_pct > 0.0).count()).unwrap_or(0);
91
92    // System columns (21): same layout and values as before, new names in header.
93    let system_row = format!(
94        "{},{},{:.3},{:.3},{:.4},{},{},{},{},{},{},{},{},{:.6},{:.6},{:.6},{},{},{:.4},{:.4},{}",
95        s.timestamp_secs,
96        s.cpu.process_count,
97        s.cpu.utime_secs,
98        s.cpu.stime_secs,
99        cpu_usage,
100        s.memory.free_mib,
101        s.memory.used_mib,
102        s.memory.buffers_mib,
103        s.memory.cached_mib,
104        s.memory.active_mib,
105        s.memory.inactive_mib,
106        disk_read,
107        disk_write,
108        disk_space_total,
109        disk_space_used,
110        disk_space_free,
111        net_recv,
112        net_sent,
113        gpu_usage,
114        gpu_vram,
115        gpu_utilized,
116    );
117
118    // Process columns (11): empty when not tracked or not yet collected.
119    let opt_u32 = |v: Option<u32>| v.map_or(String::new(), |x| x.to_string());
120    let opt_i32 = |v: Option<i32>| v.map_or(String::new(), |x| x.to_string());
121    let opt_f4 = |v: Option<f64>| v.map_or(String::new(), |x| format!("{x:.4}"));
122
123    let opt_u64 = |v: Option<u64>| v.map_or(String::new(), |x| x.to_string());
124
125    let process_row = [
126        opt_i32(s.tracked_pid),
127        opt_u32(s.cpu.process_child_count),
128        opt_f4(s.cpu.process_utime_secs),
129        opt_f4(s.cpu.process_stime_secs),
130        opt_f4(s.cpu.process_cores_used),
131        opt_u64(s.cpu.process_rss_mib),
132        opt_u64(s.cpu.process_disk_read_bytes),
133        opt_u64(s.cpu.process_disk_write_bytes),
134        opt_f4(s.cpu.process_gpu_usage),
135        opt_f4(s.cpu.process_gpu_vram_mib),
136        opt_u32(s.cpu.process_gpu_utilized),
137    ]
138    .join(",");
139
140    format!("{system_row},{process_row}")
141}
142
143// ---------------------------------------------------------------------------
144// Unit tests
145// ---------------------------------------------------------------------------
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use crate::metrics::{CpuMetrics, DiskMetrics, DiskMountMetrics, MemoryMetrics, Sample};
151
152    fn minimal_sample() -> Sample {
153        Sample {
154            timestamp_secs: 1_000_000,
155            job_name: None,
156            tracked_pid: None,
157            cpu: CpuMetrics {
158                utilization_pct: 2.5,
159                utime_secs: 1.234,
160                stime_secs: 0.567,
161                process_count: 42,
162                per_core_pct: vec![],
163                process_cores_used: None,
164                process_child_count: None,
165                process_utime_secs: None,
166                process_stime_secs: None,
167                process_rss_mib: None,
168                process_disk_read_bytes: None,
169                process_disk_write_bytes: None,
170                process_gpu_usage: None,
171                process_gpu_vram_mib: None,
172                process_gpu_utilized: None,
173                process_tree_pids: vec![],
174            },
175            memory: MemoryMetrics {
176                total_mib: 8192,
177                free_mib: 1000,
178                available_mib: 2000,
179                used_mib: 2000,
180                used_pct: 25.0,
181                buffers_mib: 100,
182                cached_mib: 500,
183                swap_total_mib: 0,
184                swap_used_mib: 0,
185                swap_used_pct: 0.0,
186                active_mib: 1500,
187                inactive_mib: 300,
188            },
189            network: vec![],
190            disk: vec![],
191            gpu: vec![],
192        }
193    }
194
195    // T-CSV-01: header is the first line and contains no embedded newlines.
196    #[test]
197    fn test_csv_header_is_first_line_no_embedded_newline() {
198        let h = csv_header();
199        assert!(
200            h.starts_with("timestamp,"),
201            "header must start with 'timestamp,'"
202        );
203        assert!(
204            !h.contains('\n'),
205            "header must not contain an embedded newline"
206        );
207    }
208
209    // T-CSV-02: column count in each data row equals column count in header.
210    #[test]
211    fn test_csv_row_column_count_matches_header() {
212        let header_cols = csv_header().split(',').count();
213        let row = sample_to_csv_row(&minimal_sample(), 1);
214        let row_cols = row.split(',').count();
215        assert_eq!(
216            row_cols, header_cols,
217            "header has {header_cols} columns but row has {row_cols}: {row}"
218        );
219    }
220
221    // T-CSV-03: system_cpu_usage column equals utilization_pct (already fractional cores) to 4 dp.
222    //
223    // NOTE: The Specification.md table formula reads "utilization_pct / 100 × total_cores"
224    // which is stale.  PR #1 Changelog explicitly corrected this:
225    //   "Was: utilization_pct / 100.0 * total_cores; Now: utilization_pct directly
226    //    (field is already in fractional cores)."
227    // The CpuMetrics field definition in the spec and in metrics/cpu.rs both confirm
228    // utilization_pct is in range 0.0..N_cores, not 0.0..100.0.
229    // This test verifies the actual (correct) behavior.
230    #[test]
231    fn test_csv_cpu_usage_is_utilization_pct_direct() {
232        let mut sample = minimal_sample();
233        sample.cpu.utilization_pct = 3.1415;
234        let row = sample_to_csv_row(&sample, 1);
235        // Column order: timestamp(0),system_processes(1),system_utime(2),
236        //   system_stime(3),system_cpu_usage(4),...
237        let cols: Vec<&str> = row.split(',').collect();
238        let cpu_usage: f64 = cols[4]
239            .parse()
240            .unwrap_or_else(|_| panic!("system_cpu_usage column is not numeric: {:?}", cols[4]));
241        assert!(
242            (cpu_usage - 3.1415_f64).abs() < 0.00005,
243            "system_cpu_usage {cpu_usage:.4} does not match utilization_pct 3.1415"
244        );
245    }
246
247    // T-CSV-04: disk_space_used_gb == disk_space_total_gb - disk_space_free_gb.
248    #[test]
249    fn test_csv_disk_space_used_equals_total_minus_free() {
250        let mut sample = minimal_sample();
251        sample.disk = vec![DiskMetrics {
252            device: "sda".to_string(),
253            model: None,
254            vendor: None,
255            serial: None,
256            device_type: None,
257            capacity_bytes: None,
258            mounts: vec![DiskMountMetrics {
259                mount_point: "/".to_string(),
260                filesystem: "ext4".to_string(),
261                total_bytes: 100_000_000_000,
262                used_bytes: 60_000_000_000,
263                available_bytes: 40_000_000_000,
264                used_pct: 60.0,
265            }],
266            read_bytes_per_sec: 0.0,
267            write_bytes_per_sec: 0.0,
268            read_bytes_total: 0,
269            write_bytes_total: 0,
270        }];
271        let row = sample_to_csv_row(&sample, 1);
272        // Column order: ...system_disk_space_total_gb(13),system_disk_space_used_gb(14),
273        //   system_disk_space_free_gb(15),...  (indices unchanged from original layout)
274        let cols: Vec<&str> = row.split(',').collect();
275        let total: f64 = cols[13].parse().unwrap();
276        let used: f64 = cols[14].parse().unwrap();
277        let free: f64 = cols[15].parse().unwrap();
278        assert!(
279            (used - (total - free)).abs() < 1e-9,
280            "disk_space_used_gb {used:.6} != total {total:.6} - free {free:.6}"
281        );
282    }
283
284    // T-CSV-05: output is byte-for-byte reproducible for the same sample.
285    #[test]
286    fn test_csv_output_is_deterministic() {
287        let sample = minimal_sample();
288        let r1 = sample_to_csv_row(&sample, 1);
289        let r2 = sample_to_csv_row(&sample, 1);
290        assert_eq!(r1, r2, "csv row output is not deterministic");
291    }
292
293    // T-CSV-07: process_gpu_usage, process_gpu_vram_mib, and process_gpu_utilized
294    // are emitted at columns 29, 30, and 31 when set.
295    #[test]
296    fn test_csv_process_gpu_fields_emitted_when_set() {
297        let mut sample = minimal_sample();
298        sample.tracked_pid = Some(42);
299        sample.cpu.process_gpu_usage = Some(0.55);
300        sample.cpu.process_gpu_vram_mib = Some(83.1875);
301        sample.cpu.process_gpu_utilized = Some(1);
302
303        let row = sample_to_csv_row(&sample, 1);
304        let cols: Vec<&str> = row.split(',').collect();
305
306        assert_eq!(cols[29], "0.5500", "process_gpu_usage mismatch");
307        assert_eq!(cols[30], "83.1875", "process_gpu_vram_mib mismatch");
308        assert_eq!(cols[31], "1", "process_gpu_utilized mismatch");
309    }
310
311    // T-CSV-08: process GPU columns are empty strings when no PID is tracked.
312    #[test]
313    fn test_csv_process_gpu_fields_empty_when_untracked() {
314        let sample = minimal_sample(); // tracked_pid = None, all process fields None
315
316        let row = sample_to_csv_row(&sample, 1);
317        let cols: Vec<&str> = row.split(',').collect();
318
319        assert_eq!(cols[29], "", "process_gpu_usage must be empty when None");
320        assert_eq!(cols[30], "", "process_gpu_vram_mib must be empty when None");
321        assert_eq!(cols[31], "", "process_gpu_utilized must be empty when None");
322    }
323
324    // T-CSV-06: no quoted fields; header has no trailing comma.
325    // Note: data rows may end with ',' when trailing process fields are empty
326    // (no PID tracked).  This is valid CSV -- empty fields after the last comma
327    // represent null values, not a formatting error.
328    #[test]
329    fn test_csv_no_trailing_commas_no_quoted_fields() {
330        let row = sample_to_csv_row(&minimal_sample(), 1);
331        assert!(!row.contains('"'), "double-quoted field in row: {row}");
332        assert!(!row.contains('\''), "single-quoted field in row: {row}");
333        let h = csv_header();
334        assert!(!h.ends_with(','), "trailing comma in header");
335        assert!(!h.contains('"'), "double-quoted field in header");
336    }
337}