My cuda verison is 11.4 and my driver version is 450.191.x
I run nvmlDeviceGetProcessUtilization to get GPU util and the result is wrong.
Here is my code
#include <stdio.h>
#include <stdlib.h>
#include <nvml.h>
#include <sys/time.h>
int main() {
nvmlReturn_t result;
nvmlDevice_t device;
unsigned int deviceCount;
unsigned int utilizationCount = 100;
nvmlProcessUtilizationSample_t *utilization = NULL;
unsigned long long lastSeenTimeStamp = 0;
result = nvmlInit();
if (result != NVML_SUCCESS) {
printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));
return 1;
}
result = nvmlDeviceGetCount(&deviceCount);
if (result != NVML_SUCCESS) {
printf("Failed to get device count: %s\n", nvmlErrorString(result));
nvmlShutdown();
return 1;
}
for (unsigned int i = 0; i < deviceCount; i++) {
result = nvmlDeviceGetHandleByIndex(i, &device);
if (result != NVML_SUCCESS) {
printf("Failed to get handle for device %d: %s\n", i, nvmlErrorString(result));
continue;
}
printf("%d\n", utilizationCount); // ------> first get process count
result = nvmlDeviceGetProcessUtilization(device, utilization, &utilizationCount, lastSeenTimeStamp);
printf("%d\n", utilizationCount); // ------> second get process count
if (result == NVML_ERROR_INSUFFICIENT_SIZE) {
utilization = (nvmlProcessUtilizationSample_t *)malloc(utilizationCount * sizeof(nvmlProcessUtilizationSample_t));
if (utilization == NULL) {
printf("Failed to allocate memory for process utilization samples.\n");
nvmlShutdown();
return 1;
}
}
time_t nowt = time(NULL);
lastSeenTimeStamp = ((int)nowt - 1) * pow(10, 6);
printf("%d\n", utilizationCount); // ------> first get process count
result = nvmlDeviceGetProcessUtilization(device, utilization, &utilizationCount, lastSeenTimeStamp);
printf("%d %s\n", utilizationCount, nvmlErrorString(result)); // ------> second get process count
if (result == NVML_SUCCESS) {
for (unsigned int j = 0; j < utilizationCount; j++) {
printf(" PID: %u\n", utilization[j].pid);
printf(" GPU Utilization: %u%%\n", utilization[j].smUtil);
printf(" Memory Utilization: %u%%\n", utilization[j].memUtil);
printf("--------------------------\n");
}
} else if (result == NVML_ERROR_INSUFFICIENT_SIZE) {
printf("Buffer too small. Increase utilizationCount.\n");
} else {
printf("Failed to get process utilization for device %d: %s\n", i, nvmlErrorString(result));
}
if (utilization != NULL) {
free(utilization);
utilization = NULL;
}
}
nvmlShutdown();
return 0;
}
The output is following
Every 0.5s: ./n1 n147-167-074: Fri Mar 21 18:20:51 2025
device: 0
4
100
100
1 Success
PID: 650092
GPU Utilization: 98%
Memory Utilization: 83%
--------------------------
device: 1
4
100
100
100 Success
PID: 3593859416
GPU Utilization: 3482903712%
Memory Utilization: 21961%
--------------------------
PID: 0
GPU Utilization: 0%
Memory Utilization: 0%
--------------------------
PID: 0
GPU Utilization: 0%
Memory Utilization: 0%
--------------------------
PID: 0
GPU Utilization: 4294967295%
Memory Utilization: 0%
--------------------------
PID: 4294901760
GPU Utilization: 4294967295%
Memory Utilization: 4294967295%
--------------------------
PID: 3482903968
GPU Utilization: 0%
Memory Utilization: 0%
I have run one process in device 0 and no process in device 1. Why second print process count is 100 and return SUCCESS which will result to some wrong pid and wrong util? Is it related to a mismatch between the CUDA version and the driver version?"