Execute RDMSR and WRMSR instructions from C/C++ code
Chances are, you are running this code on an x86 processor within Ring 3. You do not have the privileges to execute this command. Period. This is a hardware limitation. The only way to execute that instruction is to go into Ring 0 and chances are, your OS won't let you do that. You will need to write a kernel-mode driver to accomplish this.
Edit: http://faydoc.tripod.com/cpu/rdmsr.htm has more info.
Cannot read back from MSR
The problem has to do with the fact that you don't fully tell gcc which registers you're using in inline assembly and how and you also expect that gcc doesn't do anything funky to the registers between the fragments of your inline assembly code. Related mov
and xxmsr
instructions should be in the same asm block.
Look what gcc does with your code (I've altered it a tiny bit to make it compilable as a regular program)...
Source:
// file: msr.c
#include <stdio.h>
typedef unsigned uint32_t;
#define printk printf
#define __init
static int __init test3_init(void)
{
uint32_t hi,lo;
hi=0; lo=0xb;
asm volatile("mov %0,%%eax"::"r"(lo));
asm volatile("mov %0,%%edx"::"r"(hi));
asm volatile("mov $0x38d,%ecx");
asm volatile("wrmsr");
printk("exit_write: hi=%08x lo=%08x\n",hi,lo);
asm volatile("mov $0x38d,%ecx");
asm volatile("rdmsr":"=a"(lo),"=d"(hi));
printk("exit_write2: hi=%08x lo=%08x\n",hi,lo);
return 0;
}
int main(void)
{
return test3_init();
}
Compiling (with MinGW gcc 4.6.2):
gcc msr.c -c -S -o msr.s
Disassembly of test3_init()
from msr.s:
_test3_init:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
subl $32, %esp
movl $0, -12(%ebp)
movl $11, -16(%ebp)
movl -16(%ebp), %eax
mov %eax,%eax
movl -12(%ebp), %eax
mov %eax,%edx
mov $0x38d,%ecx
wrmsr
movl -16(%ebp), %eax
movl %eax, 8(%esp)
movl -12(%ebp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
mov $0x38d,%ecx
rdmsr
movl %edx, %ebx
movl %eax, %esi
movl %esi, -16(%ebp)
movl %ebx, -12(%ebp)
movl -16(%ebp), %eax
movl %eax, 8(%esp)
movl -12(%ebp), %eax
movl %eax, 4(%esp)
movl $LC1, (%esp)
call _printf
movl $0, %eax
addl $32, %esp
popl %ebx
popl %esi
popl %ebp
ret
Note that when the CPU starts executing wrmsr
it has ecx
=0x38d (OK), edx
=0 (OK), eax
=0 (not 0xb, oops!). Follow the instructions to see it.
What you can and should write instead is something like the following, even shorter than it was:
static int __init test3_init2(void)
{
uint32_t hi,lo;
hi=0; lo=0xb;
asm volatile("wrmsr"::"c"(0x38d),"a"(lo),"d"(hi));
printk("exit_write: hi=%08x lo=%08x\n",hi,lo);
asm volatile("rdmsr":"=a"(lo),"=d"(hi):"c"(0x38d));
printk("exit_write2: hi=%08x lo=%08x\n",hi,lo);
return 0;
}
Now, disassembly of test3_init2()
:
_test3_init2:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
subl $48, %esp
movl $0, -12(%ebp)
movl $11, -16(%ebp)
movl $909, %ecx
movl -16(%ebp), %eax
movl -12(%ebp), %edx
wrmsr
movl -16(%ebp), %eax
movl %eax, 8(%esp)
movl -12(%ebp), %eax
movl %eax, 4(%esp)
movl $LC0, (%esp)
call _printf
movl $909, -28(%ebp)
movl -28(%ebp), %ecx
rdmsr
movl %edx, %ebx
movl %eax, %esi
movl %esi, -16(%ebp)
movl %ebx, -12(%ebp)
movl -16(%ebp), %eax
movl %eax, 8(%esp)
movl -12(%ebp), %eax
movl %eax, 4(%esp)
movl $LC1, (%esp)
call _printf
movl $0, %eax
addl $48, %esp
popl %ebx
popl %esi
popl %ebp
ret
Also, remember that every CPU has its own MSR and you may want to set this MSR on all of them. Another important consideration is that the thread in which you're manipulating an MSR should not be moved between different CPUs until you're done with the MSR.
How to get the CPU cycle count in x86_64 from C++?
Starting from GCC 4.5 and later, the __rdtsc()
intrinsic is now supported by both MSVC and GCC.
But the include that's needed is different:
#ifdef _WIN32
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
Here's the original answer before GCC 4.5.
Pulled directly out of one of my projects:
#include <stdint.h>
// Windows
#ifdef _WIN32
#include <intrin.h>
uint64_t rdtsc(){
return __rdtsc();
}
// Linux/GCC
#else
uint64_t rdtsc(){
unsigned int lo,hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return ((uint64_t)hi << 32) | lo;
}
#endif
This GNU C Extended asm tells the compiler:
volatile
: the outputs aren't a pure function of the inputs (so it has to re-run every time, not reuse an old result)."=a"(lo)
and"=d"(hi)
: the output operands are fixed registers: EAX and EDX. (x86 machine constraints). The x86rdtsc
instruction puts its 64-bit result in EDX:EAX, so letting the compiler pick an output with"=r"
wouldn't work: there's no way to ask the CPU for the result to go anywhere else.((uint64_t)hi << 32) | lo
- zero-extend both 32-bit halves to 64-bit (because lo and hi areunsigned
), and logically shift + OR them together into a single 64-bit C variable. In 32-bit code, this is just a reinterpretation; the values still just stay in a pair of 32-bit registers. In 64-bit code you typically get an actual shift + OR asm instructions, unless the high half optimizes away.
(editor's note: this could probably be more efficient if you used unsigned long
instead of unsigned int
. Then the compiler would know that lo
was already zero-extended into RAX. It wouldn't know that the upper half was zero, so |
and +
are equivalent if it wanted to merge a different way. The intrinsic should in theory give you the best of both worlds as far as letting the optimizer do a good job.)
https://gcc.gnu.org/wiki/DontUseInlineAsm if you can avoid it. But hopefully this section is useful if you need to understand old code that uses inline asm so you can rewrite it with intrinsics. See also https://stackoverflow.com/tags/inline-assembly/info
Using GDB to read MSRs
If you prefer not changing your code (or if the code is not available) you could do something similar to amdn's answer in the following way. The call to arch_prctl requires a pointer to a uint64_t, for which I use the address to an empty portion of the stack (8 bytes below the current stack pointer). After the call returns, read the 8 byte value stored at the location.
Constants used: ARCH_GET_FS = 0x1003, ARCH_GET_GS = 0x1004
(gdb) p $rsp
$1 = (void *)0x7fffffffe6f0
(gdb) call arch_prctl(0x1003, $rsp - 0x8)
$2 = 0
(gdb) x /gx $rsp - 0x8
0x7fffffffe6e8: 0x00007ffff7fe0700 => IA32_FS_BASE
(gdb) call arch_prctl(0x1004, $rsp - 0x8)
$3 = 0
(gdb) x /gx $rsp - 0x8
0x7fffffffe6e8: 0x0000000000000000 => IA32_GS_BASE
Related Topics
How to Use Pre-Compiled Headers in Vc++ Without Requiring Stdafx.H
Error: Declaration Does Not Declare Anything
How to Properly Setup Googletest on Os X Aside from Xcode
Can Someone Explain About Linux Library Naming
Calculate Md5 of a String in C++
After Sending a Lot, My Send() Call Causes My Program to Stall Completely. How Is This Possible
How to Perform Rgb->Yuv Conversion in C/C++
C++ 'Strcpy' Gives a Warning (C4996)
Warning: Narrowing Conversion C++11
Is Rdtsc Timer Inaccurate in Linux
Can a C Compiler Rearrange Stack Variables
C++: What Is the Printf() Format Spec for "Float"
Building Qt 4.5 with Visual C++ 2010
Capturing H264 Stream with Opencv
Are End+1 Iterators for Std::String Allowed
How Is Push_Back Implemented in Stl Vector
What Happens to an Stl Iterator After Erasing It in VS, Unix/Linux