Sending Realtime Signal from a Kernel Module to User Space Fails

Is sending real-time signal from kernel with SI_QUEUE is required for sending `struct siginfo.si_int`?

This is not an answer, but an extended comment, since experimenting will sometimes yield insights. Technically, this is just an opinion, but with a detailed basis on that opinion. So, "comment" fits it best.

Here is a simple program that catches SIGUSR1, SIGUSR2, and all POSIX realtime signals (SIGRTMIN+0 to SIGRTMAX-0, inclusive); catcher.c:

#define _POSIX_C_SOURCE  200809L
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>

static const char *signal_name(const int signum)
{
static char name_buffer[16];

switch (signum) {
case SIGINT: return "SIGINT";
case SIGHUP: return "SIGHUP";
case SIGTERM: return "SIGTERM";
case SIGUSR1: return "SIGUSR1";
case SIGUSR2: return "SIGUSR2";
}

if (signum >= SIGRTMIN && signum <= SIGRTMAX) {
snprintf(name_buffer, sizeof name_buffer, "SIGRTMIN+%d", signum-SIGRTMIN);
return (const char *)name_buffer;
}

snprintf(name_buffer, sizeof name_buffer, "[%d]", signum);
return (const char *)name_buffer;
}

int main(void)
{
const int pid = (int)getpid();
siginfo_t info;
sigset_t mask;
int i;

sigemptyset(&mask);

/* INT, HUP, and TERM for termination. */
sigaddset(&mask, SIGINT);
sigaddset(&mask, SIGHUP);
sigaddset(&mask, SIGTERM);

/* USR1 and USR2 signals, for comparison to realtime signals. */
sigaddset(&mask, SIGUSR1);
sigaddset(&mask, SIGUSR2);

/* Realtime signals. */
for (i = SIGRTMIN; i <= SIGRTMAX; i++)
sigaddset(&mask, i);

if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1) {
fprintf(stderr, "Cannot block signals: %s.\n", strerror(errno));
return EXIT_FAILURE;
}

printf("Process %d is waiting for realtime signals (%d to %d, inclusive).\n", pid, SIGRTMIN, SIGRTMAX);
printf(" (sigwaitinfo() is at %p, and is called from %p.)\n", (void *)sigwaitinfo, (void *)&&callsite);
fflush(stdout);

while (1) {
/* Clear the signal info structure, so that we can detect nonzero data reliably. */
memset(&info, 0, sizeof info);

callsite:
i = sigwaitinfo(&mask, &info);
if (i == SIGINT || i == SIGTERM || i == SIGHUP) {
fprintf(stderr, "%d: Received %s. Exiting.\n", pid, signal_name(i));
return EXIT_SUCCESS;
} else
if (i == -1) {
fprintf(stderr, "%d: sigwaitinfo() failed: %s.\n", pid, strerror(errno));
return EXIT_FAILURE;
}

printf("%d: Received %s:\n", pid, signal_name(i));

printf(" si_signo: %d\n", info.si_signo);
printf(" si_errno: %d\n", info.si_errno);
printf(" si_code: %d\n", info.si_code);
printf(" si_pid: %d\n", (int)info.si_pid);
printf(" si_uid: %d\n", (int)info.si_uid);
printf(" si_status: %d\n", info.si_status);
printf(" si_utime: %.3f\n", (double)info.si_utime / (double)CLOCKS_PER_SEC);
printf(" si_stime: %.3f\n", (double)info.si_stime / (double)CLOCKS_PER_SEC);
printf(" si_value.sival_int: %d\n", info.si_value.sival_int);
printf(" si_value.sival_ptr: %p\n", info.si_value.sival_ptr);
printf(" si_int: %d\n", info.si_int);
printf(" si_ptr: %p\n", info.si_ptr);
printf(" si_overrun: %d\n", info.si_overrun);
printf(" si_timerid: %d\n", info.si_timerid);
printf(" si_addr: %p\n", info.si_addr);
printf(" si_band: %ld (0x%lx)\n", info.si_band, (unsigned long)(info.si_band));
printf(" si_fd: %d\n", info.si_fd);
printf(" si_addr_lsb: %d\n", (int)info.si_addr_lsb);
printf(" si_lower: %p\n", info.si_lower);
printf(" si_upper: %p\n", info.si_upper);
}
}

Compile it using e.g. gcc -Wall -Wextra -O2 catcher.c -o catcher, and run it in a terminal window (./catcher). (It takes no command-line parameters.)

It tells you its process ID, and runs until you press Ctrl+C, or send it an INT, HUP, or TERM signal.

For the sake of the example, I'll assume it is running as process 12345 later on.

To queue signals to another userspace process, we need a second program, queue.c:

#define _POSIX_C_SOURCE  200809L
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <errno.h>

static inline int at_end(const char *s)
{
if (!s)
return 0; /* NULL pointer is not at end of string. */

/* Skip whitespace. */
while (isspace((unsigned char)(*s)))
s++;

/* Return true/1 if at end of string, false/0 otherwise. */
return *s == '\0';
}

static int parse_pid(const char *src, pid_t *to)
{
long s;
const char *end;

if (!src || at_end(src))
return -1;

errno = 0;
end = src;
s = strtol(src, (char **)&end, 0);
if (!errno && at_end(end) && s) {
const pid_t p = s;
if ((long)p == s) {
if (to)
*to = p;
return 0;
}
}

return -1;
}

static int parse_signum(const char *src, int *to)
{
const unsigned int rtmax = SIGRTMAX - SIGRTMIN;
int signum = 0;
unsigned int u;
char dummy;

if (!src || !*src)
return -1;

/* Skip leading whitespace. */
while (isspace((unsigned char)(*src)))
src++;

/* Skip optional SIG prefix. */
if (src[0] == 'S' && src[1] == 'I' && src[2] == 'G')
src += 3;

do {
if (!strcmp(src, "USR1")) {
signum = SIGUSR1;
break;
}
if (!strcmp(src, "USR2")) {
signum = SIGUSR2;
break;
}
if (!strcmp(src, "RTMIN")) {
signum = SIGRTMIN;
break;
}
if (!strcmp(src, "RTMAX")) {
signum = SIGRTMAX;
break;
}
if (sscanf(src, "RTMIN+%u %c", &u, &dummy) == 1 && u <= rtmax) {
signum = SIGRTMIN + u;
break;
}
if (sscanf(src, "RTMAX-%u %c", &u, &dummy) == 1 && u <= rtmax) {
signum = SIGRTMAX - u;
break;
}
if (sscanf(src, "%u %c", &u, &dummy) == 1 && u > 0 && (int)u <= SIGRTMAX) {
signum = u;
break;
}

return -1;
} while (0);
if (to)
*to = signum;
return 0;
}

static int parse_sigval(const char *src, union sigval *to)
{
unsigned long u; /* In Linux, sizeof (unsigned long) == sizeof (void *). */
long s;
int op = 0;
const char *end;

/* Skip leading whitespace. */
if (src)
while (isspace((unsigned char)(*src)))
src++;

/* Nothing to parse? */
if (!src || !*src)
return -1;

/* ! or ~ unary operator? */
if (*src == '!' || *src == '~')
op = *(src++);

/* Try parsing as an unsigned long first. */
errno = 0;
end = src;
u = strtoul(src, (char **)&end, 0);
if (!errno && at_end(end)) {
if (op == '!')
u = !u;
else
if (op == '~')
u = ~u;
if (to)
to->sival_ptr = (void *)u;
return 0;
}

/* Try parsing as a signed long. */
errno = 0;
end = src;
s = strtol(src, (char **)&end, 0);
if (!errno && at_end(end)) {
if (op == '!')
s = !s;
else
if (op == '~')
s = ~s;
if (to)
to->sival_ptr = (void *)s;
return 0;
}

return -1;
}

int main(int argc, char *argv[])
{
const int pid = (int)getpid();
pid_t target = 0;
int signum = -1;
union sigval value;

if (argc != 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
const char *argv0 = (argc > 0 && argv && argv[0] && argv[0][0]) ? argv[0] : "(this)";
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s PID SIGNAL VALUE\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "Queues signal SIGNAL to process PID, with value VALUE.\n");
fprintf(stderr, "You can use negative PIDs for process group -PID.\n");
fprintf(stderr, "\n");
return (argc <= 2) ? EXIT_SUCCESS : EXIT_FAILURE;
}

if (parse_pid(argv[1], &target) || !target) {
fprintf(stderr, "%s: Invalid process ID.\n", argv[1]);
return EXIT_FAILURE;
}

if (parse_signum(argv[2], &signum)) {
fprintf(stderr, "%s: Invalid signal name or number.\n", argv[2]);
return EXIT_FAILURE;
}

if (parse_sigval(argv[3], &value)) {
fprintf(stderr, "%s: Invalid value.\n", argv[3]);
return EXIT_FAILURE;
}

callsite:
if (sigqueue(target, signum, value) == -1) {
fprintf(stderr, "Process %d failed to send signal %d with value %p to process %d: %s.\n", pid, signum, value.sival_ptr, (int)target, strerror(errno));
return EXIT_FAILURE;
} else {
printf("Process %d sent signal %d with value %p to process %d.\n", pid, signum, value.sival_ptr, (int)target);
printf(" (sigqueue() is at %p, calling sigqueue() at %p.)\n", (void *)sigqueue, (void *)(&&callsite));
return EXIT_SUCCESS;
}
}

Compile it too using e.g. gcc -Wall -Wextra -O2 queue.c -o queue. It takes three command-line parameters; run it without parameters (or with just -h or --help) to see its usage.

If the catcher is running as process 12345, we can run e.g. ./queue 12345 SIGRTMIN+5 0xcafedeadbeefbabe to queue a signal to the catcher, and see the output.

If the queue process happens to be 54321, we can expect the following output on x86-64 architecture:

    si_signo:    39
si_errno: 0
si_code: -1
si_pid: 54321
si_uid: 1001
si_status: -1091585346
si_utime: 0.000
si_stime: 0.000
si_value.sival_int: -1091585346
si_value.sival_ptr: 0xcafedeadbeefbabe
si_int: -1091585346
si_ptr: 0xcafedeadbeefbabe
si_overrun: 1001
si_timerid: 54321
si_addr: 0x3e90000d431
si_band: 4299262317617 (0x3e90000d431)
si_fd: -1091585346
si_addr_lsb: -17730
si_lower: (nil)
si_upper: (nil)

(Other hardware architectures may vary slightly due to byte order and long/pointer size differences.)

Of these fields, only si_signo == SIGRTMIN+5, si_errno == 0, and si_code == -1 == SI_QUEUE are defined for all signals.

The rest of the fields are actually in various unions, which means that the subset of fields we can access, depends on the si_code field (as per man 2 sigaction).

When si_code == SI_QUEUE, we have si_pid (the pid of the process that did sigqueue(), or 0 if it is from the kernel), si_int == si_value.sival_int, and si_ptr == si_value.sival_ptr. The rest of the fields are essentially in an union of these those, so by accessing them, we're just type-punning the contents, getting garbage.

When si_code == SI_KERNEL, the userspace does not know which of the unions was populated. That is, we don't know if the si_pid and si_int or si_ptr are valid, or whether the kernel intended us to examine si_addr (similar to SIGBUS) or some other fields.

This means that for the userspace to understand correctly a signal sent by the kernel that contains pertinent data in si_int or si_ptr, the logical and least-surprise option is to have si_code == SI_QUEUE and si_pid == 0.

(Indeed, I do recall seeing this in real life, but cannot remember for the life of me where. If I did, I could have made this an answer, but because I don't, this has to stay as an extended comment; report of observed behaviour only.)

Finally, if we look at the userspace API for Linux kernel 5.9.9, we can see the definition of siginfo_t in include/uapi/asm-generic/siginfo.h. Remember, this is not how C libraries expose the information; this is how the Linux kernel delivers the information to userspace. Combining the definitions for readability, and ignoring certain arch differences (like member order), we have essentially

typedef struct siginfo {
union {
struct {
int si_signo;
int si_errno;
int si_code;

union {

struct {
__kernel_pid_t _pid;
__kernel_uid32_t _uid;
} _kill;

struct {
__kernel_timer_t _tid;
int _overrun;
sigval_t _sigval;
int _sys_private; /* not to be passed to user */
} _timer;

struct {
__kernel_pid_t _pid;
__kernel_uid32_t _uid;
sigval_t _sigval;
} _rt;

struct {
__kernel_pid_t _pid;
__kernel_uid32_t _uid;
int _status;
__ARCH_SI_CLOCK_T _utime;
__ARCH_SI_CLOCK_T _stime;
} _sigchld;

struct {
void __user *_addr;
int _trapno;
union {
short _addr_lsb;
struct {
char _dummy_bnd[__ADDR_BND_PKEY_PAD];
void __user *_lower;
void __user *_upper;
} _addr_bnd;
struct {
char _dummy_pkey[__ADDR_BND_PKEY_PAD];
__u32 _pkey;
} _addr_pkey;
};
} _sigfault;

struct {
__ARCH_SI_BAND_T _band;
int _fd;
} _sigpoll;

struct {
void __user *_call_addr;
int _syscall;
unsigned int _arch;
} _sigsys;

} _sifields;
};

int _si_pad[SI_MAX_SIZE/sizeof(int)];
};
} siginfo_t;

So, essentially, the kernel can provide the fields in only one of the _rt, _kill, _timer, _sigchld, _sigfault, _sigpoll, or _sigsys structures -- because they alias each other -- and the only fields for the userspace to determine which one to access, are the common ones: si_signo, si_errno, and si_code. (Although si_errno really is reserved for errno code.)

Existing userspace code – using the guidance of man 2 sigaction – knows to examine si_ptr/si_int only when si_code == SI_QUEUE. So, it is logical for the kernel to emit such signals with si_pid == 0 and si_code == SI_QUEUE.

The final wrinkle is the C library. For example, the GNU C library uses one or two POSIX realtime signals internally (typically 32 and 33; among other things, to synchronize things like process uid, which are actually per-thread properties in Linux, but per-process properties in POSIX). So, a C library may "consume" odd-looking signals, because it might see them as its own. (Usually not, though, as the signal number is pretty decisive!)

More importantly, the siginfo_t structure used by a particular C library may not be anything like the one used by the Linux kernel (the library just copies the fields as needed from a temporary copy of the structure). So, if one relies on details on how the Linux kernel provides the siginfo_t, instead of how siginfo_t is used in practice, one can be bitten by such translation layer in the C library.

Here, again, the least surprising case for a signal with a si_int/si_ptr payload, from the kernel, would be si_pid == 0 and si_code == SI_QUEUE. There is no sane reason for a C library to consume or drop such signals. And, the only difference between such and normal userspace queued signals is then si_pid being zero (which is not a valid process ID).

At this point, we could claim the answer to the stated question is "well, no, not really; but you want to use SI_QUEUE so the C library and/or the userspace process does not get confused". However, that is not an authoritative answer, just an opinion.

How to catch a signal in a kernel module

In kernel you may use signal_pending() or fatal_signal_pending() for check whether signal/fatal signal is arrived:

while(!fatal_signal_pending(current) {
// infinite loop
}

So, you may press Control+C for insmod <your-module.ko>, and module's init function will terminate the loop.


Kernel thread (created with kthread_create() or similar) may catch signals only if it allows them with allow_signal()

How can kernel module communicate with user-space process during rmmod?

Such a design does not fit well with how the kernel works.

Instead, you should make the module report itself as in-use until the cleanup has been completed (so causing rmmod to fail). When you want to unload the module, you should trigger the userspace cleanup to happen, then perform the rmmod when it is complete (presumably with some kind of userspace script).

You could implement this by having the userspace daemon hold a file descriptor open to the device provided by the kernel module, closing it once the userspace cleanup has happened.

Communicate Kernel module and user space (Driver)

First of all, you shouldn't check the return value of filp_open with NULL. You should check if the call has succeeded by using IS_ERR()

if (IS_ERR(f)) {
pr_err("Error opening file")
}

I believe the file_open has returned an error pointer and you are trying to dereference it

f->f_op->read(f, buf, 128, &f->f_pos);

Alternatively, you can use addr2line to find which line has caused the kernel panic



Related Topics



Leave a reply



Submit