Linux – Why Are rmdir and Unlink Two Separate System Calls?

directorylinuxrmsystem-calls

Here's something that kept me wondering for a while:

[15:40:50][/tmp]$ mkdir a
[15:40:52][/tmp]$ strace rmdir a
execve("/usr/bin/rmdir", ["rmdir", "a"], [/* 78 vars */]) = 0
brk(0)                                  = 0x11bb000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff3772c3000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=245801, ...}) = 0
mmap(NULL, 245801, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7ff377286000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\36\3428<\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2100672, ...}) = 0
mmap(0x3c38e00000, 3924576, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3c38e00000
mprotect(0x3c38fb4000, 2097152, PROT_NONE) = 0
mmap(0x3c391b4000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b4000) = 0x3c391b4000
mmap(0x3c391ba000, 16992, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3c391ba000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff377285000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7ff377283000
arch_prctl(ARCH_SET_FS, 0x7ff377283740) = 0
mprotect(0x609000, 4096, PROT_READ)     = 0
mprotect(0x3c391b4000, 16384, PROT_READ) = 0
mprotect(0x3c38c1f000, 4096, PROT_READ) = 0
munmap(0x7ff377286000, 245801)          = 0
brk(0)                                  = 0x11bb000
brk(0x11dc000)                          = 0x11dc000
brk(0)                                  = 0x11dc000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0
mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7ff370d5a000
close(3)                                = 0
rmdir("a")                              = 0
close(1)                                = 0
close(2)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++
[15:40:55][/tmp]$ touch a
[15:41:16][/tmp]$ strace rm a
execve("/usr/bin/rm", ["rm", "a"], [/* 78 vars */]) = 0
brk(0)                                  = 0xfa8000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3b2388a000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=245801, ...}) = 0
mmap(NULL, 245801, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3b2384d000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\36\3428<\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2100672, ...}) = 0
mmap(0x3c38e00000, 3924576, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3c38e00000
mprotect(0x3c38fb4000, 2097152, PROT_NONE) = 0
mmap(0x3c391b4000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b4000) = 0x3c391b4000
mmap(0x3c391ba000, 16992, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3c391ba000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3b2384c000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3b2384a000
arch_prctl(ARCH_SET_FS, 0x7f3b2384a740) = 0
mprotect(0x60d000, 4096, PROT_READ)     = 0
mprotect(0x3c391b4000, 16384, PROT_READ) = 0
mprotect(0x3c38c1f000, 4096, PROT_READ) = 0
munmap(0x7f3b2384d000, 245801)          = 0
brk(0)                                  = 0xfa8000
brk(0xfc9000)                           = 0xfc9000
brk(0)                                  = 0xfc9000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106070960, ...}) = 0
mmap(NULL, 106070960, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3b1d321000
close(3)                                = 0
ioctl(0, SNDCTL_TMR_TIMEBASE or SNDRV_TIMER_IOCTL_NEXT_DEVICE or TCGETS, {B38400 opost isig icanon echo ...}) = 0
newfstatat(AT_FDCWD, "a", {st_mode=S_IFREG|0664, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0
geteuid()                               = 1000
newfstatat(AT_FDCWD, "a", {st_mode=S_IFREG|0664, st_size=0, ...}, AT_SYMLINK_NOFOLLOW) = 0
faccessat(AT_FDCWD, "a", W_OK)          = 0
unlinkat(AT_FDCWD, "a", 0)              = 0
lseek(0, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
close(0)                                = 0
close(1)                                = 0
close(2)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++

Why are there separate system calls for removing a directory and files? Why would these two operations be semantically distinct?

Best Answer

Directories are special in the sense that within a directory you can have references to several files and directories, so, if you remove the parent directory, all those files lose their reference point from where they can be accessed, the same with process. For such cases, rmdir() have different checks, that are different from unlink():

  • If the directory is not empty. If a directory is not empty it can't remove it until the contents are unlink'd/removed.

       ENOTEMPTY
          pathname contains entries other than . and .. ; or, pathname has
          ..  as its final component.  POSIX.1-2001 also allows EEXIST for
          this condition.
    
  • If the directory is in use. If a process losses their current directory, it could lead to problems and undefined behaviors. Is better to prevent them.

       EBUSY  pathname  is currently in use by the system or some process that
          prevents its removal.  On Linux this means pathname is currently
          used  as  a  mount point or is the root directory of the calling
          process.
    

In the case of unlink() these checks doesn't exist. In fact, you can delete the name of a file with unlink() and the process that is still using/making reference to it, can modify it without problems. The file exist until the file descriptor exist, just unaccessible to new process (unless you know where to search). This is part of the rainbow-colored-hands magic of the *NIX file systems.

Now, there's the unlinkat() which behaves as both, unlink() or rmdir(2) depending the path which is what you expect.

Related Question