MySQL swaps on system with more than enough RAM

MySQLmysql-5.7numapercona-serverperformance

Om my new dual AMD EPYC 7401 24-Core (128G RAM) MySQL starts swapping even after a restart with no data in memory.

NUMA

# numactl --hardware
available: 8 nodes (0-7)
node 0 cpus: 0 1 2 3 4 5 48 49 50 51 52 53
node 0 size: 32114 MB
node 0 free: 198 MB
node 1 cpus: 6 7 8 9 10 11 54 55 56 57 58 59
node 1 size: 32232 MB
node 1 free: 124 MB
node 2 cpus: 12 13 14 15 16 17 60 61 62 63 64 65
node 2 size: 0 MB
node 2 free: 0 MB
node 3 cpus: 18 19 20 21 22 23 66 67 68 69 70 71
node 3 size: 0 MB
node 3 free: 0 MB
node 4 cpus: 24 25 26 27 28 29 72 73 74 75 76 77
node 4 size: 32253 MB
node 4 free: 136 MB
node 5 cpus: 30 31 32 33 34 35 78 79 80 81 82 83
node 5 size: 32227 MB
node 5 free: 212 MB
node 6 cpus: 36 37 38 39 40 41 84 85 86 87 88 89
node 6 size: 0 MB
node 6 free: 0 MB
node 7 cpus: 42 43 44 45 46 47 90 91 92 93 94 95
node 7 size: 0 MB
node 7 free: 0 MB
node distances:
node   0   1   2   3   4   5   6   7 
  0:  10  16  16  16  32  32  32  32 
  1:  16  10  16  16  32  32  32  32 
  2:  16  16  10  16  32  32  32  32 
  3:  16  16  16  10  32  32  32  32 
  4:  32  32  32  32  10  16  16  16 
  5:  32  32  32  32  16  10  16  16 
  6:  32  32  32  32  16  16  10  16 
  7:  32  32  32  32  16  16  16  10 

MySQL config

# cat /etc/mysql/my.cnf
# Ansible managed


[client]
port = 3306
socket = /run/mysqld/mysqld.sock
ssl_cert = /etc/mysql/client-cert.pem
ssl_key = /etc/mysql/client-key.pem

[mysqld_safe]
socket = /run/mysqld/mysqld.sock
nice = 0
flush_caches = 1

[mysqld]
user = mysql
socket = /run/mysqld/mysqld.sock
pid_file = /run/mysqld/mysqld.pid
port = 3306
basedir = /usr
datadir = /var/lib/mysql
tmpdir = /tmp
lc_messages_dir = /usr/share/mysql
general_log = 0
general_log_file = /var/log/mysql/mysql.log
log_error = /var/log/mysql/error.log
log_queries_not_using_indexes = 0
slow_query_log = 0
slow_query_log_file = /var/log/mysql/mysql-slow.log
skip_external_locking = 1
skip_name_resolve = 1
max_connections = 1500
back_log = 1024
wait_timeout = 28800
interactive_timeout = 28800
sql_mode = STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
character_set_server = utf8
collation_server = utf8_general_ci
skip-character-set-client-handshake
init_connect = 'SET collation_connection = utf8_general_ci; SET NAMES utf8;'
default_storage_engine = InnoDB
key_buffer_size = 32M
myisam_recover_options = FORCE,BACKUP
thread_stack = 256K
thread_cache_size = 750
query_cache_type = 0
query_cache_limit = 2M
query_cache_size = 64M
max_allowed_packet = 256M
group_concat_max_len = 256M
tmp_table_size = 256M
max_heap_table_size = 64M
open_files_limit = 65535
innodb_open_files = 8192
table_definition_cache = 8192
table_open_cache = 8192
innodb_buffer_pool_size = 64G
innodb_log_file_size = 4G
innodb_log_buffer_size = 128M
innodb_flush_log_at_trx_commit = 2
innodb_flush_method = O_DIRECT
innodb_thread_concurrency = 0
innodb_write_io_threads = 12
innodb_read_io_threads = 12
innodb_io_capacity = 7500
innodb_numa_interleave = 1
innodb_file_per_table
innodb_stats_on_metadata = 0
innodb_flush_neighbors = 0
performance_schema = 0
ssl_ca = /etc/mysql/ca-cert.pem
ssl_cert = /etc/mysql/server-cert.pem
ssl_key = /etc/mysql/server-key.pem

[mysqldump]
quick
quote-names
max_allowed_packet = 256M

[isamchk]
key_buffer_size = 32M

As far as I can see there're some NUMA nodes with no RAM left, is that that reason for swapping? How can I prevent this (e.g. NUMA seems to be configured in MySQL)?

And although the machine is swapping, performance seems pretty good. Do I need to worry for MySQL to be killed?

Processes using memory

# for i in $(ls -d /proc/[0-9]*) 
> do  
>    out=$(grep Swap $i/status 2>/dev/null)
>    if [ "x$(echo $out | awk '{print $2}')" != "x0" ] && [ "x$(echo $out | awk '{print $2}')" != "x" ]
>    then    
>   echo "$(ps -p $(echo $i | cut -d'/' -f3) \
>          | tail -n 1 | awk '{print $4'}): $(echo $out | awk '{print $2 $3}')" 
>    fi
> done
systemd-journal: 92kB
perl: 1364kB
/usr/bin/pt-dis: 1452kB
sshd: 8kB
bash: 76kB
mysqld: 204436kB
sshd: 4kB
bash: 24kB

htop

Swap setting

vm.swappiness=1

Number of (filled) memory slots

# dmidecode -t memory | grep -i size
    Size: No Module Installed
    Size: No Module Installed
    Size: No Module Installed
    Size: No Module Installed
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: No Module Installed
    Size: No Module Installed
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: No Module Installed
    Size: No Module Installed
    Size: No Module Installed
    Size: No Module Installed
    Size: No Module Installed
    Size: No Module Installed
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: No Module Installed
    Size: No Module Installed
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: 8192 MB
    Size: No Module Installed
    Size: No Module Installed

lshw (memory) output

# lshw -class memory
  *-firmware              
       description: BIOS
       vendor: HPE
       physical id: 5
       version: A40
       date: 01/25/2019
       size: 64KiB
       capacity: 15MiB
       capabilities: pci pnp upgrade shadowing escd cdboot bootselect edd int13floppy360 int13floppy1200 int13floppy720 int5printscreen int9keyboard int14serial int17printer int10video acpi usb biosbootspecification netboot uefi
  *-memory:0
       description: System Memory
       physical id: 15
       slot: System board or motherboard
     *-bank:0
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 0
          slot: PROC 1 DIMM 1
          width: 64 bits
     *-bank:1
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 1
          slot: PROC 1 DIMM 2
          width: 64 bits
     *-bank:2
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 2
          slot: PROC 1 DIMM 3
          width: 64 bits
     *-bank:3
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 3
          slot: PROC 1 DIMM 4
          width: 64 bits
     *-bank:4
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 4
          slot: PROC 1 DIMM 5
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:5
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 5
          slot: PROC 1 DIMM 6
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:6
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 6
          slot: PROC 1 DIMM 7
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:7
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 7
          slot: PROC 1 DIMM 8
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:8
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 8
          slot: PROC 1 DIMM 9
          width: 64 bits
     *-bank:9
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 9
          slot: PROC 1 DIMM 10
          width: 64 bits
     *-bank:10
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: a
          slot: PROC 1 DIMM 11
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:11
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: b
          slot: PROC 1 DIMM 12
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:12
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: c
          slot: PROC 1 DIMM 13
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:13
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: d
          slot: PROC 1 DIMM 14
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:14
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: e
          slot: PROC 1 DIMM 15
          width: 64 bits
     *-bank:15
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: f
          slot: PROC 1 DIMM 16
          width: 64 bits
  *-memory:1
       description: System Memory
       physical id: 16
       slot: System board or motherboard
     *-bank:0
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 0
          slot: PROC 2 DIMM 1
          width: 64 bits
     *-bank:1
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 1
          slot: PROC 2 DIMM 2
          width: 64 bits
     *-bank:2
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 2
          slot: PROC 2 DIMM 3
          width: 64 bits
     *-bank:3
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 3
          slot: PROC 2 DIMM 4
          width: 64 bits
     *-bank:4
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 4
          slot: PROC 2 DIMM 5
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:5
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 5
          slot: PROC 2 DIMM 6
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:6
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 6
          slot: PROC 2 DIMM 7
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:7
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 7
          slot: PROC 2 DIMM 8
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:8
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 8
          slot: PROC 2 DIMM 9
          width: 64 bits
     *-bank:9
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: 9
          slot: PROC 2 DIMM 10
          width: 64 bits
     *-bank:10
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: a
          slot: PROC 2 DIMM 11
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:11
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: b
          slot: PROC 2 DIMM 12
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:12
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: c
          slot: PROC 2 DIMM 13
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:13
          description: DIMM Synchronous 2400 MHz (0.4 ns)
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: d
          slot: PROC 2 DIMM 14
          size: 8GiB
          width: 64 bits
          clock: 2400MHz (0.4ns)
     *-bank:14
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: e
          slot: PROC 2 DIMM 15
          width: 64 bits
     *-bank:15
          description: DIMM Synchronous [empty]
          product: NOT AVAILABLE
          vendor: UNKNOWN
          physical id: f
          slot: PROC 2 DIMM 16
          width: 64 bits
  *-cache:0
       description: L1 cache
       physical id: 81
       slot: L1-Cache
       size: 2304KiB
       capacity: 2304KiB
       clock: 1GHz (1.0ns)
       capabilities: pipeline-burst internal write-back unified
       configuration: level=1
  *-cache:1
       description: L2 cache
       physical id: 82
       slot: L2-Cache
       size: 12MiB
       capacity: 12MiB
       clock: 1GHz (1.0ns)
       capabilities: pipeline-burst internal write-back unified
       configuration: level=2
  *-cache:2
       description: L3 cache
       physical id: 83
       slot: L3-Cache
       size: 64MiB
       capacity: 64MiB
       clock: 1GHz (1.0ns)
       capabilities: pipeline-burst internal write-back unified
       configuration: level=3
  *-cache:3
       description: L1 cache
       physical id: 85
       slot: L1-Cache
       size: 2304KiB
       capacity: 2304KiB
       clock: 1GHz (1.0ns)
       capabilities: pipeline-burst internal write-back unified
       configuration: level=1
  *-cache:4
       description: L2 cache
       physical id: 86
       slot: L2-Cache
       size: 12MiB
       capacity: 12MiB
       clock: 1GHz (1.0ns)
       capabilities: pipeline-burst internal write-back unified
       configuration: level=2
  *-cache:5
       description: L3 cache
       physical id: 87
       slot: L3-Cache
       size: 64MiB
       capacity: 64MiB
       clock: 1GHz (1.0ns)
       capabilities: pipeline-burst internal write-back unified
       configuration: level=3
  *-memory:2 UNCLAIMED
       physical id: 0
  *-memory:3 UNCLAIMED
       physical id: 1

https://jira.percona.com/browse/PS-5947

Best Answer

Nice to meet you, Mischa. We discussed this in person but I thought I'd put some ideas out here for anyone else to reference and we could continue some discussion here. ;)

Looking at your information provided above, the thing that stands out most to me is that you have 8 total NUMA nodes, but only 4 of them have memory attached to them. I don't know too much about the AMD EPYC architecture, so I'm learning in real time, but this is typically a bad configuration, and one that can cause problems.

In particular I am not sure how Linux's NUMA interleave node behaves when it's interleaved across all nodes, but not all nodes actually have memory – I think you could easily get into interesting edge cases either with performance, or e.g. swapping.

It would be interesting to try a few things and see how the performance and behavior differs:

  • Using numactl to limit mysqld to interleave only on the nodes which actually have memory, this should be e.g. numactl --interleave=0,1,4,5
  • Adjusting your physical memory configuration to use twice as many DIMMs of half the size. I'd guess you have 8 x 16 GiB DIMMs now, so you could potentially try a configuration with 16 x 8 GiB DIMMs instead, populating all DIMM sockets.
  • Adjusting your physical memory configuration to use twice as much RAM could also be interesting (at least as a test), just populating all sockets with your current size DIMMs.
  • Again assuming you're currently using 8 x 16 GiB DIMMs, adjusting your physical memory configuration to populate one DIMM slot per channel (which is also per-NUMA-node) instead of two DIMMs in a single channel and some channels unpopulated. It looks like AMD recommends this as well in their memory population guidelines for AMD EPYC processors.

I am very interested to hear what the end result of this is.