resource_tracker/output/
csv.rs

1use crate::metrics::Sample;
2
3/// CSV header using the same `system_`/`process_` prefix convention as
4/// Python resource-tracker.  System columns (21) cover host-wide metrics;
5/// process columns (11) cover the tracked PID tree.
6///
7/// Unit notes:
8///   system_cpu_usage    - fractional cores (0..N), same as Python
9///   system_memory_*_mib - mebibytes (MiB = 1,048,576 bytes)
10///   system_disk_*       - bytes per interval, same as Python
11///   system_net_*        - bytes per interval, same as Python
12///   system_disk_space_* - GB summed across all block-device mounts
13///   system_gpu_vram_mib - MiB, same as Python
14///   process_cpu_usage   - fractional cores consumed by tracked PID tree
15///
16/// Process fields not yet collected are emitted as empty strings.
17pub fn csv_header() -> &'static str {
18    "timestamp,\
19     system_processes,system_utime,system_stime,system_cpu_usage,\
20     system_memory_free_mib,system_memory_used_mib,system_memory_buffers_mib,\
21     system_memory_cached_mib,system_memory_active_mib,system_memory_inactive_mib,\
22     system_disk_read_bytes,system_disk_write_bytes,\
23     system_disk_space_total_gb,system_disk_space_used_gb,system_disk_space_free_gb,\
24     system_net_recv_bytes,system_net_sent_bytes,\
25     system_gpu_usage,system_gpu_vram_mib,system_gpu_utilized,\
26     process_pid,process_children,process_utime,process_stime,process_cpu_usage,\
27     process_memory_mib,process_disk_read_bytes,process_disk_write_bytes,\
28     process_gpu_usage,process_gpu_vram_mib,process_gpu_utilized"
29}
30
31/// Serialize a `Sample` as a single CSV row (no newline).
32///
33/// `interval_secs` is required to convert bytes/sec rates into per-interval
34/// byte counts, matching Python resource-tracker's convention.
35///
36/// Process fields not yet collected are emitted as empty strings.
37/// All process fields are empty when no PID is being tracked.
38pub fn sample_to_csv_row(s: &Sample, interval_secs: u64) -> String {
39    // system_cpu_usage: utilization_pct is already in fractional cores (0..N_cores)
40    let cpu_usage = s.cpu.utilization_pct;
41
42    // Disk I/O: per-interval byte counts (rate × interval ≈ bytes in this window)
43    let secs = f64::from(u32::try_from(interval_secs).unwrap_or(u32::MAX));
44    let disk_read: u64 = s
45        .disk
46        .iter()
47        .map(|d| (d.read_bytes_per_sec * secs) as u64)
48        .sum();
49    let disk_write: u64 = s
50        .disk
51        .iter()
52        .map(|d| (d.write_bytes_per_sec * secs) as u64)
53        .sum();
54
55    // Disk space: sum all mounts; used = total - free (includes root-reserved blocks)
56    let disk_space_total: f64 = s
57        .disk
58        .iter()
59        .flat_map(|d| d.mounts.iter())
60        .map(|m| m.total_bytes as f64 / 1_000_000_000.0)
61        .sum();
62    let disk_space_free: f64 = s
63        .disk
64        .iter()
65        .flat_map(|d| d.mounts.iter())
66        .map(|m| m.available_bytes as f64 / 1_000_000_000.0)
67        .sum();
68    let disk_space_used = disk_space_total - disk_space_free;
69
70    // Network I/O: per-interval byte counts
71    let net_recv: u64 = s
72        .network
73        .iter()
74        .map(|n| (n.rx_bytes_per_sec * secs) as u64)
75        .sum();
76    let net_sent: u64 = s
77        .network
78        .iter()
79        .map(|n| (n.tx_bytes_per_sec * secs) as u64)
80        .sum();
81
82    // GPU system aggregates
83    let gpu_usage: f64 = s.gpu.iter().map(|g| g.utilization_pct / 100.0).sum();
84    let gpu_vram: f64 = s
85        .gpu
86        .iter()
87        .map(|g| g.vram_used_bytes as f64 / 1_048_576.0)
88        .sum();
89    let gpu_utilized: u32 =
90        u32::try_from(s.gpu.iter().filter(|g| g.utilization_pct > 0.0).count()).unwrap_or(0);
91
92    // System columns (21): same layout and values as before, new names in header.
93    let system_row = format!(
94        "{},{},{:.3},{:.3},{:.4},{},{},{},{},{},{},{},{},{:.6},{:.6},{:.6},{},{},{:.4},{:.4},{}",
95        s.timestamp_secs,
96        s.cpu.process_count,
97        s.cpu.utime_secs,
98        s.cpu.stime_secs,
99        cpu_usage,
100        s.memory.free_mib,
101        s.memory.used_mib,
102        s.memory.buffers_mib,
103        s.memory.cached_mib,
104        s.memory.active_mib,
105        s.memory.inactive_mib,
106        disk_read,
107        disk_write,
108        disk_space_total,
109        disk_space_used,
110        disk_space_free,
111        net_recv,
112        net_sent,
113        gpu_usage,
114        gpu_vram,
115        gpu_utilized,
116    );
117
118    // Process columns (11): empty when not tracked or not yet collected.
119    let opt_u32 = |v: Option<u32>| v.map_or(String::new(), |x| x.to_string());
120    let opt_i32 = |v: Option<i32>| v.map_or(String::new(), |x| x.to_string());
121    let opt_f4 = |v: Option<f64>| v.map_or(String::new(), |x| format!("{x:.4}"));
122
123    let opt_u64 = |v: Option<u64>| v.map_or(String::new(), |x| x.to_string());
124
125    let process_row = [
126        opt_i32(s.tracked_pid),
127        opt_u32(s.cpu.process_child_count),
128        opt_f4(s.cpu.process_utime_secs),
129        opt_f4(s.cpu.process_stime_secs),
130        opt_f4(s.cpu.process_cores_used),
131        opt_u64(s.cpu.process_pss_mib),
132        opt_u64(s.cpu.process_disk_read_bytes),
133        opt_u64(s.cpu.process_disk_write_bytes),
134        opt_f4(s.cpu.process_gpu_usage),
135        opt_f4(s.cpu.process_gpu_vram_mib),
136        opt_u32(s.cpu.process_gpu_utilized),
137    ]
138    .join(",");
139
140    format!("{system_row},{process_row}")
141}
142
143// ---------------------------------------------------------------------------
144// Unit tests
145// ---------------------------------------------------------------------------
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use crate::metrics::{CpuMetrics, DiskMetrics, DiskMountMetrics, MemoryMetrics, Sample};
151
152    fn minimal_sample() -> Sample {
153        Sample {
154            timestamp_secs: 1_000_000,
155            job_name: None,
156            tracked_pid: None,
157            cpu: CpuMetrics {
158                utilization_pct: 2.5,
159                utime_secs: 1.234,
160                stime_secs: 0.567,
161                process_count: 42,
162                per_core_pct: vec![],
163                process_cores_used: None,
164                process_child_count: None,
165                process_utime_secs: None,
166                process_stime_secs: None,
167                process_pss_mib: None,
168                process_rss_mib: None,
169                process_disk_read_bytes: None,
170                process_disk_write_bytes: None,
171                process_gpu_usage: None,
172                process_gpu_vram_mib: None,
173                process_gpu_utilized: None,
174                process_tree_pids: vec![],
175            },
176            memory: MemoryMetrics {
177                total_mib: 8192,
178                free_mib: 1000,
179                available_mib: 2000,
180                used_mib: 2000,
181                used_pct: 25.0,
182                buffers_mib: 100,
183                cached_mib: 500,
184                swap_total_mib: 0,
185                swap_used_mib: 0,
186                swap_used_pct: 0.0,
187                active_mib: 1500,
188                inactive_mib: 300,
189            },
190            network: vec![],
191            disk: vec![],
192            gpu: vec![],
193        }
194    }
195
196    // T-CSV-01: header is the first line and contains no embedded newlines.
197    #[test]
198    fn test_csv_header_is_first_line_no_embedded_newline() {
199        let h = csv_header();
200        assert!(
201            h.starts_with("timestamp,"),
202            "header must start with 'timestamp,'"
203        );
204        assert!(
205            !h.contains('\n'),
206            "header must not contain an embedded newline"
207        );
208    }
209
210    // T-CSV-02: column count in each data row equals column count in header.
211    #[test]
212    fn test_csv_row_column_count_matches_header() {
213        let header_cols = csv_header().split(',').count();
214        let row = sample_to_csv_row(&minimal_sample(), 1);
215        let row_cols = row.split(',').count();
216        assert_eq!(
217            row_cols, header_cols,
218            "header has {header_cols} columns but row has {row_cols}: {row}"
219        );
220    }
221
222    // T-CSV-03: system_cpu_usage column equals utilization_pct (already fractional cores) to 4 dp.
223    //
224    // NOTE: The Specification.md table formula reads "utilization_pct / 100 × total_cores"
225    // which is stale.  PR #1 Changelog explicitly corrected this:
226    //   "Was: utilization_pct / 100.0 * total_cores; Now: utilization_pct directly
227    //    (field is already in fractional cores)."
228    // The CpuMetrics field definition in the spec and in metrics/cpu.rs both confirm
229    // utilization_pct is in range 0.0..N_cores, not 0.0..100.0.
230    // This test verifies the actual (correct) behavior.
231    #[test]
232    fn test_csv_cpu_usage_is_utilization_pct_direct() {
233        let mut sample = minimal_sample();
234        sample.cpu.utilization_pct = 3.1415;
235        let row = sample_to_csv_row(&sample, 1);
236        // Column order: timestamp(0),system_processes(1),system_utime(2),
237        //   system_stime(3),system_cpu_usage(4),...
238        let cols: Vec<&str> = row.split(',').collect();
239        let cpu_usage: f64 = cols[4]
240            .parse()
241            .unwrap_or_else(|_| panic!("system_cpu_usage column is not numeric: {:?}", cols[4]));
242        assert!(
243            (cpu_usage - 3.1415_f64).abs() < 0.00005,
244            "system_cpu_usage {cpu_usage:.4} does not match utilization_pct 3.1415"
245        );
246    }
247
248    // T-CSV-04: disk_space_used_gb == disk_space_total_gb - disk_space_free_gb.
249    #[test]
250    fn test_csv_disk_space_used_equals_total_minus_free() {
251        let mut sample = minimal_sample();
252        sample.disk = vec![DiskMetrics {
253            device: "sda".to_string(),
254            model: None,
255            vendor: None,
256            serial: None,
257            device_type: None,
258            capacity_bytes: None,
259            mounts: vec![DiskMountMetrics {
260                mount_point: "/".to_string(),
261                filesystem: "ext4".to_string(),
262                total_bytes: 100_000_000_000,
263                used_bytes: 60_000_000_000,
264                available_bytes: 40_000_000_000,
265                used_pct: 60.0,
266            }],
267            read_bytes_per_sec: 0.0,
268            write_bytes_per_sec: 0.0,
269            read_bytes_total: 0,
270            write_bytes_total: 0,
271        }];
272        let row = sample_to_csv_row(&sample, 1);
273        // Column order: ...system_disk_space_total_gb(13),system_disk_space_used_gb(14),
274        //   system_disk_space_free_gb(15),...  (indices unchanged from original layout)
275        let cols: Vec<&str> = row.split(',').collect();
276        let total: f64 = cols[13].parse().unwrap();
277        let used: f64 = cols[14].parse().unwrap();
278        let free: f64 = cols[15].parse().unwrap();
279        assert!(
280            (used - (total - free)).abs() < 1e-9,
281            "disk_space_used_gb {used:.6} != total {total:.6} - free {free:.6}"
282        );
283    }
284
285    // T-CSV-05: output is byte-for-byte reproducible for the same sample.
286    #[test]
287    fn test_csv_output_is_deterministic() {
288        let sample = minimal_sample();
289        let r1 = sample_to_csv_row(&sample, 1);
290        let r2 = sample_to_csv_row(&sample, 1);
291        assert_eq!(r1, r2, "csv row output is not deterministic");
292    }
293
294    // T-CSV-07: process_gpu_usage, process_gpu_vram_mib, and process_gpu_utilized
295    // are emitted at columns 29, 30, and 31 when set.
296    #[test]
297    fn test_csv_process_gpu_fields_emitted_when_set() {
298        let mut sample = minimal_sample();
299        sample.tracked_pid = Some(42);
300        sample.cpu.process_gpu_usage = Some(0.55);
301        sample.cpu.process_gpu_vram_mib = Some(83.1875);
302        sample.cpu.process_gpu_utilized = Some(1);
303
304        let row = sample_to_csv_row(&sample, 1);
305        let cols: Vec<&str> = row.split(',').collect();
306
307        assert_eq!(cols[29], "0.5500", "process_gpu_usage mismatch");
308        assert_eq!(cols[30], "83.1875", "process_gpu_vram_mib mismatch");
309        assert_eq!(cols[31], "1", "process_gpu_utilized mismatch");
310    }
311
312    // T-CSV-08: process GPU columns are empty strings when no PID is tracked.
313    #[test]
314    fn test_csv_process_gpu_fields_empty_when_untracked() {
315        let sample = minimal_sample(); // tracked_pid = None, all process fields None
316
317        let row = sample_to_csv_row(&sample, 1);
318        let cols: Vec<&str> = row.split(',').collect();
319
320        assert_eq!(cols[29], "", "process_gpu_usage must be empty when None");
321        assert_eq!(cols[30], "", "process_gpu_vram_mib must be empty when None");
322        assert_eq!(cols[31], "", "process_gpu_utilized must be empty when None");
323    }
324
325    // T-CSV-06: no quoted fields; header has no trailing comma.
326    // Note: data rows may end with ',' when trailing process fields are empty
327    // (no PID tracked).  This is valid CSV -- empty fields after the last comma
328    // represent null values, not a formatting error.
329    #[test]
330    fn test_csv_no_trailing_commas_no_quoted_fields() {
331        let row = sample_to_csv_row(&minimal_sample(), 1);
332        assert!(!row.contains('"'), "double-quoted field in row: {row}");
333        assert!(!row.contains('\''), "single-quoted field in row: {row}");
334        let h = csv_header();
335        assert!(!h.ends_with(','), "trailing comma in header");
336        assert!(!h.contains('"'), "double-quoted field in header");
337    }
338}
resource_tracker/output/csv.rs

resource_tracker/output/
csv.rs