Detect server hardware (CPU count, memory amount, disk space, GPU count and VRAM amount) via procfs or psutil, and nvidia-smi.
Functions:
get_total_memory_mib
Get total system memory in MiB from /proc/meminfo or using psutil.
Source code in resource_tracker/server_info.py
| def get_total_memory_mib() -> float:
"""Get total system memory in MiB from `/proc/meminfo` or using `psutil`."""
with suppress(Exception):
with open("/proc/meminfo", "r") as f:
for line in f:
if "MemTotal" in line:
parts = line.split(":")
kib = int(parts[1].strip().split()[0])
return round(kib / 1024, 2)
with suppress(Exception):
from psutil import virtual_memory
return round(virtual_memory().total / (1024**2), 2)
return 0
|
get_gpu_info
Get GPU information using nvidia-smi command.
Returns:
| Type |
Description |
dict
|
A dictionary containing GPU information:
count: Number of GPUs
memory_mib: Total VRAM in MiB
gpu_names: List of GPU names
|
Source code in resource_tracker/server_info.py
| def get_gpu_info() -> dict:
"""Get GPU information using `nvidia-smi` command.
Returns:
A dictionary containing GPU information:
- `count`: Number of GPUs
- `memory_mib`: Total VRAM in MiB
- `gpu_names`: List of GPU names
"""
result = {"count": 0, "memory_mib": 0, "gpu_names": []}
with suppress(Exception):
nvidia_smi_output = check_output(
[
"nvidia-smi",
"--query-gpu=gpu_name,memory.total",
"--format=csv,noheader,nounits",
],
universal_newlines=True,
)
lines = nvidia_smi_output.strip().split("\n")
result["count"] = len(lines)
total_memory_mib = 0
for line in lines:
if line.strip():
parts = line.split(",")
memory_mib = float(parts[1].strip())
total_memory_mib += memory_mib
result["gpu_names"].append(parts[0].strip())
result["memory_mib"] = total_memory_mib
return result
|
get_server_info
Collects important information about the Linux server.
Returns:
| Type |
Description |
dict
|
A dictionary containing server information:
os: Operating system
vcpus: Number of virtual CPUs
memory_mib: Total memory in MiB
gpu_count: Number of GPUs (0 if not available)
gpu_names: List of GPU names ([] if not available)
gpu_name: Most common GPU name ("" if not available)
gpu_memory_mib: Total VRAM in MiB (0 if not available)
|
Source code in resource_tracker/server_info.py
| def get_server_info() -> dict:
"""
Collects important information about the Linux server.
Returns:
A dictionary containing server information:
- `os`: Operating system
- `vcpus`: Number of virtual CPUs
- `memory_mib`: Total memory in MiB
- `gpu_count`: Number of GPUs (`0` if not available)
- `gpu_names`: List of GPU names (`[]` if not available)
- `gpu_name`: Most common GPU name (`""` if not available)
- `gpu_memory_mib`: Total VRAM in MiB (`0` if not available)
"""
gpu_info = get_gpu_info()
info = {
"os": system(),
"vcpus": cpu_count(),
"memory_mib": get_total_memory_mib(),
"gpu_count": gpu_info["count"],
"gpu_names": gpu_info["gpu_names"],
"gpu_name": (
Counter(gpu_info["gpu_names"]).most_common(1)[0][0]
if gpu_info["gpu_names"]
else ""
),
"gpu_memory_mib": gpu_info["memory_mib"],
}
return info
|