summaryrefslogtreecommitdiff
path: root/modules/pc/security/kernel.nix
blob: 62b2f284ca9f662d6d3a9943e50d629e51682f9f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
{ lib, ... }:
{
  boot = {
    # https://docs.kernel.org/admin-guide/sysctl/vm.html
    kernel.sysctl = {
      # The Magic SysRq key is a key combo that allows users connected to the
      # system console of a Linux kernel to perform some low-level commands.
      # Disable it, since we don't need it, and is a potential security concern.
      "kernel.sysrq" = lib.mkForce 0;

      # Restrict ptrace() usage to processes with a pre-defined relationship
      # (e.g., parent/child)
      # FIXME: this breaks game launchers, find a way to launch them with privileges (steam)
      # gamescope wrapped with the capabilities *might* solve the issue
      # spoiler: it didn't
      # "kernel.yama.ptrace_scope" = 2;

      # Hide kptrs even for processes with CAP_SYSLOG
      # also prevents printing kernel pointers
      "kernel.kptr_restrict" = 2;

      # Disable bpf() JIT (to eliminate spray attacks)
      "net.core.bpf_jit_enable" = false;

      # Disable ftrace debugging
      "kernel.ftrace_enabled" = false;

      # Avoid kernel memory address exposures via dmesg (this value can also be set by CONFIG_SECURITY_DMESG_RESTRICT).
      "kernel.dmesg_restrict" = 1;

      # Prevent creating files in potentially attacker-controlled environments such
      # as world-writable directories to make data spoofing attacks more difficult
      "fs.protected_fifos" = 2;

      # Prevent unintended writes to already-created files
      "fs.protected_regular" = 2;

      # Disable SUID binary dump
      "fs.suid_dumpable" = 0;

      # Prevent unprivileged users from creating hard or symbolic links to files
      "fs.protected_symlinks" = 1;
      "fs.protected_hardlinks" = 1;

      # Disable late module loading
      # "kernel.modules_disabled" = 1;

      # Disallow profiling at all levels without CAP_SYS_ADMIN
      "kernel.perf_event_paranoid" = 3;

      # Require CAP_BPF to use bpf
      "kernel.unprivileged_bpf_disabled" = true;

      # Prevent boot console kernel log information leaks
      "kernel.printk" = "3 3 3 3";

      # Restrict loading TTY line disciplines to the CAP_SYS_MODULE capability to
      # prevent unprivileged attackers from loading vulnerable line disciplines with
      # the TIOCSETD ioctl
      "dev.tty.ldisc_autoload" = 0;

      # Kexec allows replacing the current running kernel. There may be an edge case where
      # you wish to boot into a different kernel, but I do not require kexec. Disabling it
      # patches a potential security hole in our system.
      "kernel.kexec_load_disabled" = true;

      # Borrowed by NixOS/nixpkgs. Since the security module does not explain what those
      # options do, it is up you to educate yourself dear reader.
      # See:
      #  - <https://docs.kernel.org/admin-guide/sysctl/vm.html#mmap-rnd-bits>
      #  - <https://docs.kernel.org/admin-guide/sysctl/vm.html#mmap-min-addr>
      "vm.mmap_rnd_bits" = 32;
      "vm.mmap_min_addr" = 65536;
    };

    # https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html
    kernelParams = [
      # I'm sure we break hibernation in at least 5 other sections of this config, so
      # let's disable hibernation explicitly. Allowing hibernation makes it possible
      # to replace the booted kernel with a malicious one, akin to kexec. This helps
      # us prevent an attack called "Evil Maid" where an attacker with physical access
      # to the device. P.S. I chose to mention "Evil Maid" specifically because it sounds
      # funny. Do not think that is the only attack you are vulnerable to.
      # See: <https://en.wikipedia.org/wiki/Evil_maid_attack>
      "nohibernate"

      # make stack-based attacks on the kernel harder
      "randomize_kstack_offset=on"

      # Disable vsyscalls as they are obsolete and have been replaced with vDSO.
      # vsyscalls are also at fixed addresses in memory, making them a potential
      # target for ROP attacks
      # this breaks really old binaries for security
      "vsyscall=none"

      # reduce most of the exposure of a heap attack to a single cache
      # Disable slab merging which significantly increases the difficulty of heap
      # exploitation by preventing overwriting objects from merged caches and by
      # making it harder to influence slab cache layout
      "slab_nomerge"

      # Disable debugfs which exposes a lot of sensitive information about the
      # kernel. Some programs, such as powertop, use this interface to gather
      # information about the system, but it is not necessary for the system to
      # actually publish those. I can live without it.
      "debugfs=off"

      # Sometimes certain kernel exploits will cause what is known as an "oops".
      # This parameter will cause the kernel to panic on such oopses, thereby
      # preventing those exploits
      "oops=panic"

      # Only allow kernel modules that have been signed with a valid key to be
      # loaded, which increases security by making it much harder to load a
      # malicious kernel module
      "module.sig_enforce=1"

      # The kernel lockdown LSM can eliminate many methods that user space code
      # could abuse to escalate to kernel privileges and extract sensitive
      # information. This LSM is necessary to implement a clear security boundary
      # between user space and the kernel
      #  integrity: kernel features that allow userland to modify the running kernel
      #             are disabled
      #  confidentiality: kernel features that allow userland to extract confidential
      #             information from the kernel are also disabled
      # ArchWiki recommends opting in for "integrity", however since we avoid modifying
      # running kernel (by the virtue of using NixOS and locking module hot-loading) the
      # confidentiality mode is a better solution.
      "lockdown=confidentiality"

      # enable buddy allocator free poisoning
      #  on: memory will befilled with a specific byte pattern
      #      that is unlikely to occur in normal operation.
      #  off (default): page poisoning will be disabled
      "page_poison=on"

      # performance improvement for direct-mapped memory-side-cache utilization
      # reduces the predictability of page allocations
      "page_alloc.shuffle=1"

      # for debugging kernel-level slab issues
      "slub_debug=FZP"

      # ignore access time (atime) updates on files
      # except when they coincide with updates to the ctime or mtime
      "rootflags=noatime"

      # linux security modules
      "lsm=landlock,lockdown,yama,integrity,apparmor,bpf,tomoyo,selinux"

      # prevent the kernel from blanking plymouth out of the fb
      "fbcon=nodefer"

      # the format that will be used for integrity audit logs
      #  0 (default): basic integrity auditing messages
      #  1: additional integrity auditing messages
      "integrity_audit=1"
    ];
  };
}