I know that the system calls are not in the C standard Library. Is there any library (some sort of a system library) where the system calls are?
If there is such a library how is this library linked to the executable program?
dynamic-linkingkernellibrariesshared librarysystem-calls
I know that the system calls are not in the C standard Library. Is there any library (some sort of a system library) where the system calls are?
If there is such a library how is this library linked to the executable program?
Conceptually, a library function is part of your process.
At run-time, your executable code and the code of any libraries (such as libc.so) it depends on, get linked into a single process. So, when you call a function in such a library, it executes as part of your process, with the same resources and privileges. It's the same idea as calling a function you wrote yourself (with possible exceptions like PLT and/or trampoline functions, which you can look up if you care).
Conceptually, a system call is a special interface used to make a call from your code (which is generally unprivileged) to the kernel (which has the right to escalate privileges as necessary).
For example, see the Linux man brk.
When a C program calls malloc
to allocate memory, it is calling a library function in glibc.
If there is already enough space for the allocation inside the process, it can do any necessary heap management and return the memory to the caller.
If not, glibc needs to request more memory from the kernel: it (probably) calls the brk
glibc function, which in turn calls the brk
syscall. Only once control has passed to the kernel, via the syscall, can the global virtual memory state be modified to reserve more memory, and map it into your process' address space.
As @Patrick stated in the comments, you can use the command line tool strace
to produce a dump of the system calls that are made by a program as it runs.
Here's an example showing the command echo hi
being run.
$ strace echo "hi"
execve("/usr/bin/echo", ["echo", "hi"], [/* 94 vars */]) = 0
brk(0) = 0xf73000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9996cf2000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=206010, ...}) = 0
mmap(NULL, 206010, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f9996cbf000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\34\242\213?\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2108632, ...}) = 0
mmap(0x3f8ba00000, 3932768, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3f8ba00000
mprotect(0x3f8bbb6000, 2097152, PROT_NONE) = 0
mmap(0x3f8bdb6000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x3f8bdb6000
mmap(0x3f8bdbc000, 16992, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3f8bdbc000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9996cbe000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9996cbc000
arch_prctl(ARCH_SET_FS, 0x7f9996cbc740) = 0
mprotect(0x606000, 4096, PROT_READ) = 0
mprotect(0x3f8bdb6000, 16384, PROT_READ) = 0
mprotect(0x3f8b820000, 4096, PROT_READ) = 0
munmap(0x7f9996cbf000, 206010) = 0
brk(0) = 0xf73000
brk(0xf94000) = 0xf94000
brk(0) = 0xf94000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106055264, ...}) = 0
mmap(NULL, 106055264, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f9990797000
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f9996cf1000
write(1, "hi\n", 3hi
) = 3
close(1) = 0
munmap(0x7f9996cf1000, 4096) = 0
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++
You can also increase the details that get output by including the option -s <size>
. I typically will use -s 2000
to get 2000 characters of output per call. Also I'll include the switch -o <file>
to get the output to dump into a file. It's much easier to look at this output after the fact.
$ strace -s 2000 -o strace.log echo "hi"
hi
And here's the file:
$ cat strace.log
execve("/usr/bin/echo", ["echo", "hi"], [/* 94 vars */]) = 0
brk(0) = 0x1061000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f78bdab3000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=206010, ...}) = 0
mmap(NULL, 206010, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f78bda80000
close(3) = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\34\242\213?\0\0\0@\0\0\0\0\0\0\0\30\" \0\0\0\0\0\0\0\0\0@\0008\0\n\0@\0+\0*\0\6\0\0\0\5\0\0\0@\0\0\0\0\0\0\0@\0\240\213?\0\0\0@\0\240\213?\0\0\0000\2\0\0\0\0\0\0000\2\0\0\0\0\0\0\10\0\0\0\0\0\0\0\3\0\0\0\4\0\0\0\360\26\30\0\0\0\0\0\360\26\270\213?\0\0\0\360\26\270\213?\0\0\0\34\0\0\0\0\0\0\0\34\0\0\0\0\0\0\0\20\0\0\0\0\0\0\0\1\0\0\0\5\0\0\0\0\0\0\0\0\0\0\0\0\0\240\213?\0\0\0\0\0\240\213?\0\0\0\344W\33\0\0\0\0\0\344W\33\0\0\0\0\0\0\0 \0\0\0\0\0\1\0\0\0\6\0\0\0 g\33\0\0\0\0\0 g\333\213?\0\0\0 g\333\213?\0\0\0\240Q\0\0\0\0\0\0@\233\0\0\0\0\0\0\0\0 \0\0\0\0\0\2\0\0\0\6\0\0\0\200\233\33\0\0\0\0\0\200\233\333\213?\0\0\0\200\233\333\213?\0\0\0\360\1\0\0\0\0\0\0\360\1\0\0\0\0\0\0\10\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0p\2\0\0\0\0\0\0p\2\240\213?\0\0\0p\2\240\213?\0\0\0D\0\0\0\0\0\0\0D\0\0\0\0\0\0\0\4\0\0\0\0\0\0\0\7\0\0\0\4\0\0\0 g\33\0\0\0\0\0 g\333\213?\0\0\0 g\333\213?\0\0\0\20\0\0\0\0\0\0\0\220\0\0\0\0\0\0\0\20\0\0\0\0\0\0\0P\345td\4\0\0\0\f\27\30\0\0\0\0\0\f\27\270\213?\0\0\0\f\27\270\213?\0\0\0\\h\0\0\0\0\0\0\\h\0\0\0\0\0\0\4\0\0\0\0\0\0\0Q\345td\6\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\20\0\0\0\0\0\0\0R\345td\4\0\0\0 g\33\0\0\0\0\0 g\333\213?\0\0\0 g\333\213?\0\0\0\3408\0\0\0\0\0\0\3408\0\0\0\0\0\0\1\0\0\0\0\0\0\0\4\0\0\0\24\0\0\0\3\0\0\0GNU\0\206\353)\243\237\22p\240\253\216F\316.Tkn\303R\312\10\4\0\0\0\20\0\0\0\1\0\0\0GNU\0\0\0\0\0\2\0\0\0\6\0\0\0 \0\0\0\0\0\0\0\363\3\0\0\t\0\0\0\0\1\0\0\16\0\0\0\0000\20D\240 \2\1\210\3\346\220\305E\214\0\300\0\10\0\5\200\0`\300\200\0\r\212\f\0\4\20\0\210D2\10.@\210P<, \0162H&\204\300\214\4\10\0\2\2\16\241\254\32\4f\300\0\3002\0\300\0P\1 \201\10\204\v ($\0\4 P\0\20X\200\312DB(\0\6\200\20\30B\0 @\200\0\tP\0Q\212@\20\0\0\0\0\10\0\0\21\20", 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2108632, ...}) = 0
mmap(0x3f8ba00000, 3932768, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3f8ba00000
mprotect(0x3f8bbb6000, 2097152, PROT_NONE) = 0
mmap(0x3f8bdb6000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x3f8bdb6000
mmap(0x3f8bdbc000, 16992, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3f8bdbc000
close(3) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f78bda7f000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f78bda7d000
arch_prctl(ARCH_SET_FS, 0x7f78bda7d740) = 0
mprotect(0x606000, 4096, PROT_READ) = 0
mprotect(0x3f8bdb6000, 16384, PROT_READ) = 0
mprotect(0x3f8b820000, 4096, PROT_READ) = 0
munmap(0x7f78bda80000, 206010) = 0
brk(0) = 0x1061000
brk(0x1082000) = 0x1082000
brk(0) = 0x1082000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106055264, ...}) = 0
mmap(NULL, 106055264, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f78b7558000
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f78bdab2000
write(1, "hi\n", 3) = 3
close(1) = 0
munmap(0x7f78bdab2000, 4096) = 0
close(2) = 0
exit_group(0) = ?
+++ exited with 0 +++
You can include any program or command that you can typically run in your shell as an argument to strace
. It's probably the most useful tool included with Linux in terms of gaining insights into how executables work within your system.
I'm only scratching the surface here, you can instruct strace
to only show system calls or signals too. Check out the man strace
page for more info.
Best Answer
System calls are the API between program or library and the kernel. The implementation of system call is in the kernel. C library wraps the system calls which are CPU-architecture specific and supplies a unified C API so the C code could be moved from one architecture to another.