Graphics Card Memory Usage in Linux

Graphics card memory usage in linux

If you just need to know it for 3D graphics development purposes, you may want to look into something like gDEBugger or, if you only care about NVIDIA cards, you can try NVIDIA PerfHUD. I have not used them myself, but I would expect them to track such information.

How to tell the graphic card memory usage?

How about the OpenGL debugger?

Measuring peak nvidia GPU memory usage on linux

No sure, But you can give a try on like this.

a=0
while true; do
b=$(nvidia-smi --query-gpu=memory.used --format=csv|grep -v memory|awk '{print $1}')
[ $b -gt $a ] && a=$b && echo $a
sleep .5
done

Reading raw GPU memory from userspace application

The solution is to use vulcan API to allocate a heap on the GPU and access it. However, since x86 cannot cache MMIO addresses, every access would go to the GPU over the PCIe.

The implementation has about the same latency as Nvidia's server solution.

Here is a quick and dirty implementation in C++ that abstracts the GPU as a heap memory and allows malloc() and free() on it.

To find out the heap types, check: http://vulkan.gpuinfo.org/displayreport.php?id=14928#memory

You'd need that to check which flag your GPU supports when making the call to findMemoryType() from createVertexBuffer()

#include <chrono>
#include <vulkan/vulkan.h>

#include <algorithm>
#include <array>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <optional>
#include <set>
#include <stdexcept>
#include <vector>

#include "libvram/libvram.hh"
class VRamWrapper;

VRamWrapper *vrw_obj;

const size_t DEV_EXT_LEN = 1;
const char *deviceExtensions[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};

struct QueueFamilyIndices {
std::optional<uint32_t> graphicsFamily;

bool isComplete() { return graphicsFamily.has_value(); }
};

class VRamWrapper {
public:
void init() { initVulkan(); }

void *malloc(size_t bytes) { return this->createVertexBuffer(bytes); }
void free(void *buf) { assert(0); }

private:
VkInstance instance;

VkPhysicalDevice physicalDevice = VK_NULL_HANDLE;
VkDevice device;

VkQueue graphicsQueue;

std::vector<VkBuffer> buffers;
std::vector<VkDeviceMemory> bufferMemories;

void initVulkan() {
createInstance();
pickPhysicalDevice();
createLogicalDevice();
}

void cleanup() {
for (auto buf : buffers) {
vkDestroyBuffer(device, buf, nullptr);
}

for (auto mem : bufferMemories) {
vkFreeMemory(device, mem, nullptr);
}

vkDestroyDevice(device, nullptr);
vkDestroyInstance(instance, nullptr);
}

void createInstance() {
VkApplicationInfo appInfo{};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = "Hello Triangle";
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.pEngineName = "No Engine";
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.apiVersion = VK_API_VERSION_1_0;

VkInstanceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
createInfo.pApplicationInfo = &appInfo;

createInfo.enabledLayerCount = 0;

createInfo.pNext = nullptr;

if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) {
throw std::runtime_error("failed to create instance!");
}
}

void pickPhysicalDevice() {
uint32_t deviceCount = 0;
vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr);

if (deviceCount == 0) {
throw std::runtime_error("failed to find GPUs with Vulkan support!");
}

std::vector<VkPhysicalDevice> devices(deviceCount);
vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data());

for (const auto &device : devices) {
if (isDeviceSuitable(device)) {
physicalDevice = device;
break;
}
}

if (physicalDevice == VK_NULL_HANDLE) {
throw std::runtime_error("failed to find a suitable GPU!");
}
}

void createLogicalDevice() {
QueueFamilyIndices indices = findQueueFamilies(physicalDevice);

std::vector<VkDeviceQueueCreateInfo> queueCreateInfos;
std::set<uint32_t> uniqueQueueFamilies = {indices.graphicsFamily.value()};

float queuePriority = 1.0f;
for (uint32_t queueFamily : uniqueQueueFamilies) {
VkDeviceQueueCreateInfo queueCreateInfo{};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = queueFamily;
queueCreateInfo.queueCount = 1;
queueCreateInfo.pQueuePriorities = &queuePriority;
queueCreateInfos.push_back(queueCreateInfo);
}

VkPhysicalDeviceFeatures deviceFeatures{};

VkDeviceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;

createInfo.queueCreateInfoCount =
static_cast<uint32_t>(queueCreateInfos.size());
createInfo.pQueueCreateInfos = queueCreateInfos.data();

createInfo.pEnabledFeatures = &deviceFeatures;

createInfo.enabledExtensionCount = static_cast<uint32_t>(DEV_EXT_LEN);
createInfo.ppEnabledExtensionNames = deviceExtensions;

createInfo.enabledLayerCount = 0;

if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create logical device!");
}

vkGetDeviceQueue(device, indices.graphicsFamily.value(), 0, &graphicsQueue);
}

void *createVertexBuffer(size_t bytes) {
VkBuffer buffer;
VkDeviceMemory bufferMemory;

VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = bytes;
bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
throw std::runtime_error("failed to create vertex buffer!");
}

VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, buffer, &memRequirements);

assert(memRequirements.size == bytes);

VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex =
findMemoryType(memRequirements.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

if (auto res = vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory);
res != VK_SUCCESS) {
throw std::runtime_error("failed to allocate vertex buffer memory");
}

vkBindBufferMemory(device, buffer, bufferMemory, 0);

void *data;
auto res = vkMapMemory(device, bufferMemory, 0, bytes, 0, &data);
if (res != VK_SUCCESS) {
throw std::runtime_error("Map failed");
}

fprintf(stderr, "Map completed. Allocated %lu MiB at %p\n",
(bytes) / (1024UL * 1024), data);

this->buffers.push_back(buffer);
this->bufferMemories.push_back(bufferMemory);

return data;
}

uint32_t findMemoryType(uint32_t typeFilter,
VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);

for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) &&
(memProperties.memoryTypes[i].propertyFlags & properties) ==
properties) {
return i;
}
}

throw std::runtime_error("failed to find suitable memory type!");
}

bool isDeviceSuitable(VkPhysicalDevice device) {
QueueFamilyIndices indices = findQueueFamilies(device);

bool extensionsSupported = checkDeviceExtensionSupport(device);

return indices.isComplete() &&
extensionsSupported /* && swapChainAdequate */;
}

bool checkDeviceExtensionSupport(VkPhysicalDevice device) {
uint32_t extensionCount;
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
nullptr);

std::vector<VkExtensionProperties> availableExtensions(extensionCount);
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
availableExtensions.data());

std::set<std::string> requiredExtensions(deviceExtensions,
deviceExtensions + DEV_EXT_LEN);

for (const auto &extension : availableExtensions) {
requiredExtensions.erase(extension.extensionName);
}

return requiredExtensions.empty();
}

QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) {
QueueFamilyIndices indices;

uint32_t queueFamilyCount = 0;
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
nullptr);

std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
queueFamilies.data());

int i = 0;
for (const auto &queueFamily : queueFamilies) {
if (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
indices.graphicsFamily = i;
}

if (indices.isComplete()) {
break;
}

i++;
}

return indices;
}
};

void ctor_libvram() {
fprintf(stderr, "%s() called\n", __FUNCTION__);
vrw_obj = new VRamWrapper();
vrw_obj->init();
}

void *libvram::malloc(size_t bytes) {
return vrw_obj->malloc(bytes);
}

void libvram::free(void *ptr) {
vrw_obj->free(ptr);
}



Related Topics



Leave a reply



Submit