Graphics card memory usage in linux
If you just need to know it for 3D graphics development purposes, you may want to look into something like gDEBugger or, if you only care about NVIDIA cards, you can try NVIDIA PerfHUD. I have not used them myself, but I would expect them to track such information.
How to tell the graphic card memory usage?
How about the OpenGL debugger?
Measuring peak nvidia GPU memory usage on linux
No sure, But you can give a try on like this.
a=0
while true; do
b=$(nvidia-smi --query-gpu=memory.used --format=csv|grep -v memory|awk '{print $1}')
[ $b -gt $a ] && a=$b && echo $a
sleep .5
done
Reading raw GPU memory from userspace application
The solution is to use vulcan API to allocate a heap on the GPU and access it. However, since x86 cannot cache MMIO addresses, every access would go to the GPU over the PCIe.
The implementation has about the same latency as Nvidia's server solution.
Here is a quick and dirty implementation in C++ that abstracts the GPU as a heap memory and allows malloc()
and free()
on it.
To find out the heap types, check: http://vulkan.gpuinfo.org/displayreport.php?id=14928#memory
You'd need that to check which flag your GPU supports when making the call to findMemoryType()
from createVertexBuffer()
#include <chrono>
#include <vulkan/vulkan.h>
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <optional>
#include <set>
#include <stdexcept>
#include <vector>
#include "libvram/libvram.hh"
class VRamWrapper;
VRamWrapper *vrw_obj;
const size_t DEV_EXT_LEN = 1;
const char *deviceExtensions[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
struct QueueFamilyIndices {
std::optional<uint32_t> graphicsFamily;
bool isComplete() { return graphicsFamily.has_value(); }
};
class VRamWrapper {
public:
void init() { initVulkan(); }
void *malloc(size_t bytes) { return this->createVertexBuffer(bytes); }
void free(void *buf) { assert(0); }
private:
VkInstance instance;
VkPhysicalDevice physicalDevice = VK_NULL_HANDLE;
VkDevice device;
VkQueue graphicsQueue;
std::vector<VkBuffer> buffers;
std::vector<VkDeviceMemory> bufferMemories;
void initVulkan() {
createInstance();
pickPhysicalDevice();
createLogicalDevice();
}
void cleanup() {
for (auto buf : buffers) {
vkDestroyBuffer(device, buf, nullptr);
}
for (auto mem : bufferMemories) {
vkFreeMemory(device, mem, nullptr);
}
vkDestroyDevice(device, nullptr);
vkDestroyInstance(instance, nullptr);
}
void createInstance() {
VkApplicationInfo appInfo{};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = "Hello Triangle";
appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.pEngineName = "No Engine";
appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
appInfo.apiVersion = VK_API_VERSION_1_0;
VkInstanceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
createInfo.pApplicationInfo = &appInfo;
createInfo.enabledLayerCount = 0;
createInfo.pNext = nullptr;
if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) {
throw std::runtime_error("failed to create instance!");
}
}
void pickPhysicalDevice() {
uint32_t deviceCount = 0;
vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr);
if (deviceCount == 0) {
throw std::runtime_error("failed to find GPUs with Vulkan support!");
}
std::vector<VkPhysicalDevice> devices(deviceCount);
vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data());
for (const auto &device : devices) {
if (isDeviceSuitable(device)) {
physicalDevice = device;
break;
}
}
if (physicalDevice == VK_NULL_HANDLE) {
throw std::runtime_error("failed to find a suitable GPU!");
}
}
void createLogicalDevice() {
QueueFamilyIndices indices = findQueueFamilies(physicalDevice);
std::vector<VkDeviceQueueCreateInfo> queueCreateInfos;
std::set<uint32_t> uniqueQueueFamilies = {indices.graphicsFamily.value()};
float queuePriority = 1.0f;
for (uint32_t queueFamily : uniqueQueueFamilies) {
VkDeviceQueueCreateInfo queueCreateInfo{};
queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
queueCreateInfo.queueFamilyIndex = queueFamily;
queueCreateInfo.queueCount = 1;
queueCreateInfo.pQueuePriorities = &queuePriority;
queueCreateInfos.push_back(queueCreateInfo);
}
VkPhysicalDeviceFeatures deviceFeatures{};
VkDeviceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
createInfo.queueCreateInfoCount =
static_cast<uint32_t>(queueCreateInfos.size());
createInfo.pQueueCreateInfos = queueCreateInfos.data();
createInfo.pEnabledFeatures = &deviceFeatures;
createInfo.enabledExtensionCount = static_cast<uint32_t>(DEV_EXT_LEN);
createInfo.ppEnabledExtensionNames = deviceExtensions;
createInfo.enabledLayerCount = 0;
if (vkCreateDevice(physicalDevice, &createInfo, nullptr, &device) !=
VK_SUCCESS) {
throw std::runtime_error("failed to create logical device!");
}
vkGetDeviceQueue(device, indices.graphicsFamily.value(), 0, &graphicsQueue);
}
void *createVertexBuffer(size_t bytes) {
VkBuffer buffer;
VkDeviceMemory bufferMemory;
VkBufferCreateInfo bufferInfo{};
bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
bufferInfo.size = bytes;
bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) {
throw std::runtime_error("failed to create vertex buffer!");
}
VkMemoryRequirements memRequirements;
vkGetBufferMemoryRequirements(device, buffer, &memRequirements);
assert(memRequirements.size == bytes);
VkMemoryAllocateInfo allocInfo{};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex =
findMemoryType(memRequirements.memoryTypeBits,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (auto res = vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory);
res != VK_SUCCESS) {
throw std::runtime_error("failed to allocate vertex buffer memory");
}
vkBindBufferMemory(device, buffer, bufferMemory, 0);
void *data;
auto res = vkMapMemory(device, bufferMemory, 0, bytes, 0, &data);
if (res != VK_SUCCESS) {
throw std::runtime_error("Map failed");
}
fprintf(stderr, "Map completed. Allocated %lu MiB at %p\n",
(bytes) / (1024UL * 1024), data);
this->buffers.push_back(buffer);
this->bufferMemories.push_back(bufferMemory);
return data;
}
uint32_t findMemoryType(uint32_t typeFilter,
VkMemoryPropertyFlags properties) {
VkPhysicalDeviceMemoryProperties memProperties;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memProperties);
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
if ((typeFilter & (1 << i)) &&
(memProperties.memoryTypes[i].propertyFlags & properties) ==
properties) {
return i;
}
}
throw std::runtime_error("failed to find suitable memory type!");
}
bool isDeviceSuitable(VkPhysicalDevice device) {
QueueFamilyIndices indices = findQueueFamilies(device);
bool extensionsSupported = checkDeviceExtensionSupport(device);
return indices.isComplete() &&
extensionsSupported /* && swapChainAdequate */;
}
bool checkDeviceExtensionSupport(VkPhysicalDevice device) {
uint32_t extensionCount;
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
nullptr);
std::vector<VkExtensionProperties> availableExtensions(extensionCount);
vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount,
availableExtensions.data());
std::set<std::string> requiredExtensions(deviceExtensions,
deviceExtensions + DEV_EXT_LEN);
for (const auto &extension : availableExtensions) {
requiredExtensions.erase(extension.extensionName);
}
return requiredExtensions.empty();
}
QueueFamilyIndices findQueueFamilies(VkPhysicalDevice device) {
QueueFamilyIndices indices;
uint32_t queueFamilyCount = 0;
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
nullptr);
std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount,
queueFamilies.data());
int i = 0;
for (const auto &queueFamily : queueFamilies) {
if (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
indices.graphicsFamily = i;
}
if (indices.isComplete()) {
break;
}
i++;
}
return indices;
}
};
void ctor_libvram() {
fprintf(stderr, "%s() called\n", __FUNCTION__);
vrw_obj = new VRamWrapper();
vrw_obj->init();
}
void *libvram::malloc(size_t bytes) {
return vrw_obj->malloc(bytes);
}
void libvram::free(void *ptr) {
vrw_obj->free(ptr);
}
Related Topics
Which Is Faster of Two Case or If
Why Is My Core File Not Overwritten
Echo 'The Character - (Dash) in The Unix Command Line
How to Write on Serial Port Using Qextserialport
Data Pointers in Inode Data Structure
How to Change The Desktop Wallpaper on Linux from Within a Shell/Bash Script
Use "Git Revert" to Back-Out a Change Adding a Line
How to Get Window Id for Xdotool Automatically
Where to Store The Private Key on a Digital Signing Server
How to Correctly Nandwrite a Nanddump'Ed Dump with Oob
Ssh-Add from Bash Script and Automate Passphrase Entry
Sending Mail in Bash Script Outputs Literal \N Instead of a New Line
Qemu Simple Backend Tracing Dosen'T Print Anything
Visual Studio 2017 Could Not Create Directories, Mkdir Exit Code: 1