How Does the Linux Kernel Determine the Order of _Init Calls

How does the Linux kernel determine the order of __init calls?

All the init magic are implemented in files:

  1. include/asm-generic/vmlinux.lds.h
  2. include/linux/init.h
  3. init/main.c

Firstly, look at include/asm-generic/vmlinux.lds.h that contains the following:

 13  *      . = START;
14 * __init_begin = .;
15 * HEAD_TEXT_SECTION
16 * INIT_TEXT_SECTION(PAGE_SIZE)
17 * INIT_DATA_SECTION(...)
18 * PERCPU_SECTION(CACHELINE_SIZE)
19 * __init_end = .;

Where INIT_TEXT_SECTION and INIT_DATA_SECTION defined as follow:

790 #define INIT_TEXT_SECTION(inittext_align)                               \
791 . = ALIGN(inittext_align); \
792 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { \
793 VMLINUX_SYMBOL(_sinittext) = .; \
794 INIT_TEXT \
795 VMLINUX_SYMBOL(_einittext) = .; \
796 }
797
798 #define INIT_DATA_SECTION(initsetup_align) \
799 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { \
800 INIT_DATA \
801 INIT_SETUP(initsetup_align) \
802 INIT_CALLS \
803 CON_INITCALL \
804 SECURITY_INITCALL \
805 INIT_RAM_FS \
806 }

Let's look at INIT_CALLS defines for example:

628 #define INIT_CALLS_LEVEL(level)                                         \
629 VMLINUX_SYMBOL(__initcall##level##_start) = .; \
630 *(.initcall##level##.init) \
631 *(.initcall##level##s.init)

633 #define INIT_CALLS \
634 VMLINUX_SYMBOL(__initcall_start) = .; \
635 *(.initcallearly.init) \
636 INIT_CALLS_LEVEL(0) \
637 INIT_CALLS_LEVEL(1) \
638 INIT_CALLS_LEVEL(2) \
639 INIT_CALLS_LEVEL(3) \
640 INIT_CALLS_LEVEL(4) \
641 INIT_CALLS_LEVEL(5) \
642 INIT_CALLS_LEVEL(rootfs) \
643 INIT_CALLS_LEVEL(6) \
644 INIT_CALLS_LEVEL(7) \
645 VMLINUX_SYMBOL(__initcall_end) = .;

You can see the this defines the sections names that marked with.initcall.... And all the marked data gets into the __initcall_start .. __initcall_end range.


Now let's look at the [include/linux/init.h that contains the following:

44 #define __init          __section(.init.text) __cold notrace
45 #define __initdata __section(.init.data)

And further:

189 #define __define_initcall(level,fn,id) \
190 static initcall_t __initcall_##fn##id __used \
191 __attribute__((__section__(".initcall" level ".init"))) = fn
...
220 #define device_initcall(fn) __define_initcall("6",fn,6)
...
225 #define __initcall(fn) device_initcall(fn)
...
271 /**
272 * module_init() - driver initialization entry point
273 * @x: function to be run at kernel boot time or module insertion
274 *
275 * module_init() will either be called during do_initcalls() (if
276 * builtin) or at module insertion time (if a module). There can only
277 * be one per module.
278 */
279 #define module_init(x) __initcall(x);

So you can see that module_init defined as __initcall that defined as device_initcall that defined as __define_initcall("6",fn,6). Six here means initcall level. See below...


init/main.c contains the following:

711 extern initcall_t __initcall_start[];
712 extern initcall_t __initcall0_start[];
713 extern initcall_t __initcall1_start[];
714 extern initcall_t __initcall2_start[];
715 extern initcall_t __initcall3_start[];
716 extern initcall_t __initcall4_start[];
717 extern initcall_t __initcall5_start[];
718 extern initcall_t __initcall6_start[];
719 extern initcall_t __initcall7_start[];
720 extern initcall_t __initcall_end[];
721
722 static initcall_t *initcall_levels[] __initdata = {
723 __initcall0_start,
724 __initcall1_start,
725 __initcall2_start,
726 __initcall3_start,
727 __initcall4_start,
728 __initcall5_start,
729 __initcall6_start,
730 __initcall7_start,
731 __initcall_end,
732 };
733
734 /* Keep these in sync with initcalls in include/linux/init.h */
735 static char *initcall_level_names[] __initdata = {
736 "early",
737 "core",
738 "postcore",
739 "arch",
740 "subsys",
741 "fs",
742 "device",
743 "late",
744 };
745
746 static void __init do_initcall_level(int level)
747 {
748 extern const struct kernel_param __start___param[], __stop___param[];
749 initcall_t *fn;
750
751 strcpy(static_command_line, saved_command_line);
752 parse_args(initcall_level_names[level],
753 static_command_line, __start___param,
754 __stop___param - __start___param,
755 level, level,
756 &repair_env_string);
757
758 for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
759 do_one_initcall(*fn);
760 }
761
762 static void __init do_initcalls(void)
763 {
764 int level;
765
766 for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
767 do_initcall_level(level);
768 }

As you can see do_initcall simply iterates over all the initcall levels and calls do_initcall_level for each one that calls do_one_initcall for each level's entry.


Let's note also that kernel discards all the __init functions after execution. So they don't take place in memory after the kernel loads.

That's all.

Change the order that built in kernel drivers get initialized?

initcall ordering is defined here:

http://lxr.free-electrons.com/source/include/linux/init.h#L194

which is, for reference:

/*
* A "pure" initcall has no dependencies on anything else, and purely
* initializes variables that couldn't be statically initialized.
*
* This only exists for built-in code, not for modules.
* Keep main.c:initcall_level_names[] in sync.
*/
#define pure_initcall(fn) __define_initcall(fn, 0)

#define core_initcall(fn) __define_initcall(fn, 1)
#define core_initcall_sync(fn) __define_initcall(fn, 1s)
#define postcore_initcall(fn) __define_initcall(fn, 2)
#define postcore_initcall_sync(fn) __define_initcall(fn, 2s)
#define arch_initcall(fn) __define_initcall(fn, 3)
#define arch_initcall_sync(fn) __define_initcall(fn, 3s)
#define subsys_initcall(fn) __define_initcall(fn, 4)
#define subsys_initcall_sync(fn) __define_initcall(fn, 4s)
#define fs_initcall(fn) __define_initcall(fn, 5)
#define fs_initcall_sync(fn) __define_initcall(fn, 5s)
#define rootfs_initcall(fn) __define_initcall(fn, rootfs)
#define device_initcall(fn) __define_initcall(fn, 6)
#define device_initcall_sync(fn) __define_initcall(fn, 6s)
#define late_initcall(fn) __define_initcall(fn, 7)
#define late_initcall_sync(fn) __define_initcall(fn, 7s)

As module_init is #defined to be device_initcall, a general module with nothing dependent on it gets initialized towards the end of the sequence. To load your module early, you simply change its module_init call to something else that occurs earlier (like subsys_initcall, for example)

Note: just switching the order on things can break other dependencies, and you can get in a catch-22 dependency loop from hell.

module_init() vs. core_initcall() vs. early_initcall()

They determine the initialization order of built-in modules. Drivers will use device_initcall (or module_init; see below) most of the time. Early initialization (early_initcall) is normally used by architecture-specific code to initialize hardware subsystems (power management, DMAs, etc.) before any real driver gets initialized.

Technical stuff for understanding below

Look at init/main.c. After a few architecture-specific initialization done by code in arch/<arch>/boot and arch/<arch>/kernel, the portable start_kernel function will be called. Eventually, in the same file, do_basic_setup is called:

/*
* Ok, the machine is now initialized. None of the devices
* have been touched yet, but the CPU subsystem is up and
* running, and memory and process management works.
*
* Now we can finally start doing some real work..
*/
static void __init do_basic_setup(void)
{
cpuset_init_smp();
usermodehelper_init();
shmem_init();
driver_init();
init_irq_proc();
do_ctors();
usermodehelper_enable();
do_initcalls();
}

which ends with a call to do_initcalls:

static initcall_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
__initcall2_start,
__initcall3_start,
__initcall4_start,
__initcall5_start,
__initcall6_start,
__initcall7_start,
__initcall_end,
};

/* Keep these in sync with initcalls in include/linux/init.h */
static char *initcall_level_names[] __initdata = {
"early",
"core",
"postcore",
"arch",
"subsys",
"fs",
"device",
"late",
};

static void __init do_initcall_level(int level)
{
extern const struct kernel_param __start___param[], __stop___param[];
initcall_t *fn;

strcpy(static_command_line, saved_command_line);
parse_args(initcall_level_names[level],
static_command_line, __start___param,
__stop___param - __start___param,
level, level,
&repair_env_string);

for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
do_one_initcall(*fn);
}

static void __init do_initcalls(void)
{
int level;

for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++)
do_initcall_level(level);
}

You can see the names above with their associated index: early is 0, core is 1, etc. Each of those __initcall*_start entries point to an array of function pointers which get called one after the other. Those function pointers are the actual modules and built-in initialization functions, the ones you specify with module_init, early_initcall, etc.

What determines which function pointer gets into which __initcall*_start array? The linker does this, using hints from the module_init and *_initcall macros. Those macros, for built-in modules, assign the function pointers to a specific ELF section.

Example with module_init

Considering a built-in module (configured with y in .config), module_init simply expands like this (include/linux/init.h):

#define module_init(x)  __initcall(x);

and then we follow this:

#define __initcall(fn) device_initcall(fn)
#define device_initcall(fn) __define_initcall(fn, 6)

So, now, module_init(my_func) means __define_initcall(my_func, 6). This is _define_initcall:

#define __define_initcall(fn, id) \
static initcall_t __initcall_##fn##id __used \
__attribute__((__section__(".initcall" #id ".init"))) = fn

which means, so far, we have:

static initcall_t __initcall_my_func6 __used
__attribute__((__section__(".initcall6.init"))) = my_func;

Wow, lots of GCC stuff, but it only means that a new symbol is created, __initcall_my_func6, that's put in the ELF section named .initcall6.init, and as you can see, points to the specified function (my_func). Adding all the functions to this section eventually creates the complete array of function pointers, all stored within the .initcall6.init ELF section.

Initialization example

Look again at this chunk:

for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
do_one_initcall(*fn);

Let's take level 6, which represents all the built-in modules initialized with module_init. It starts from __initcall6_start, its value being the address of the first function pointer registered within the .initcall6.init section, and ends at __initcall7_start (excluded), incrementing each time with the size of *fn (which is an initcall_t, which is a void*, which is 32-bit or 64-bit depending on the architecture).

do_one_initcall will simply call the function pointed to by the current entry.

Within a specific initialization section, what determines why an initialization function is called before another is simply the order of the files within the Makefiles since the linker will concatenate the __initcall_* symbols one after the other in their respective ELF init. sections.

This fact is actually used in the kernel, e.g. with device drivers (drivers/Makefile):

# GPIO must come after pinctrl as gpios may need to mux pins etc
obj-y += pinctrl/
obj-y += gpio/

tl;dr: the Linux kernel initialization mechanism is really beautiful, albeit highlight GCC-dependent.

What is the Linux built-in driver load order?

Built-in drivers wont be loaded, hence built-in. Their initialization functions are called and the drivers are activated when kernel sets up itself. These init functions are called in init/main.c::do_initcalls(). All init calls are classified in levels, which are defined in initcall_levels and include/linux/init.h

These levels are actuall symbols defined in linker script (arch/*/kernel/vmlinux.lds.*). At kernel compile time, the linker collects all function marked module_init() or other *_initcall(), classify in levels, put all functions in the same level together in the same place, and create like an array of function pointers.

What do_initcall_level() does in the run-time is to call each function pointed by the pointers in the array. There is no calling policy, except levels, in do_initcall_level, but the order in the array is decided in the link time.

So, now you can see that the driver's initiation order is fixed at the link time, but what can you do?

  1. put your init function in the higher level, or
  2. put your device driver at the higher position in Makefile

The first one is clear if you've read the above. ie) use early_initcall() instead if it is appropriate.

The second one needs a bit more explanation. The reason why the order in a Makefile matter is how the current kernel build system works and how the linkers works. To make a long story short, the build system takes all object files in obj-y and link them together. It is highly environment dependent but there is high probability that the linker place first object file in the obj-y in lower address, thus, called earlier.

If you just want your driver to be called earlier than other drivers in the same directory, this is simplest way to do it.

Linux kernel, where is the task_struct (process) initialization

I tried fork, do_fork, init_task but I could not find something except
init_task.h as I said in my first post. Could you please just tell me
either the syscall either the file that I can find the code for
task_struct initialization?

You were already on the right track. Now do_fork() as well as the function behind the SYSCALL_DEFINE0(fork) calls _do_fork(), which calls copy_process(), where the new struct task_struct *p is created by p = dup_task_struct(current, node); - thereafter, copy_process() would be a good place for your additions. All this is in the file kernel/fork.c.

Changing Linux kernel module boot order

I was able to solve the problem. It turns out that order to make *_initcall()'s work, the module should be statically linked, therefore, I set;

CONFIG_DRM_VC4=y
CONFIG_SND=y
CONFIG_SNC_SOC=y

This loses a couple of milliseconds of boot time, but now /dev/fb0 loads at around ~0.3 seconds, rather than ~9 seconds.

init function invocation of drivers compiled into kernel

The init routine of a built-in driver can still use the module_init() macro to declare that entry point. Or the driver can use device_initcall() when the driver would never be compiled as a loadable module. Or to move its initialization very early in the boot sequence, the driver could use subsys_initcall().

In include/linux/init.h the sequence for invoking these init routines is described as:

/* initcalls are now grouped by functionality into separate 
* subsections. Ordering inside the subsections is determined
* by link order.
* For backwards compatibility, initcall() puts the call in
* the device init subsection.
*
* The `id' arg to __define_initcall() is needed so that multiple initcalls
* can point at the same handler without causing duplicate-symbol build errors.
*/

I assume that these subsections for device drivers correspond to the subdirectories within the drivers directory of the Linux kernel source tree, and that the link order is recorded in the built-in.o file of each subdirectory in drivers. So during kernel boot the init routine of each built-in driver is eventually executed by do_initcalls() in init/main.c.

The init routine of the device driver is responsible for probing the system to verify that the HW device actually exists. The driver should not allocate any resources or register any devices when the probe fails.

UPDATE:

Passing the option "initcall_debug" on the kernel command line will cause timing information to be printed to the console for each initcall. initcalls are used to initialize statically linked kernel drivers and subsystems and contribute a significant amount of time to the Linux boot process. The output looks like:

calling  tty_class_init+0x0/0x44 @ 1
initcall tty_class_init+0x0/0x44 returned 0 after 9765 usecs
calling spi_init+0x0/0x90 @ 1
initcall spi_init+0x0/0x90 returned 0 after 9765 usecs

Reference: http://elinux.org/Initcall_Debug



Related Topics



Leave a reply



Submit