HAProxy community

Haproxy keep increase used memory until OOM

Hi,

In these few days, my haproxy suddenly getting crazy and a little bit out of control
It was in version 1.8.8 for more than a year and work fine.
But suddenly keep eat up all memory even swap. It will get OOM about every two hours
I have try to fine-tune and upgrade to version 1.9.12.
But still have no luck.

Below are the details:

ulimit

core file size          (blocks, -c) 0
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 95690
max locked memory       (kbytes, -l) 2000000000
max memory size         (kbytes, -m) 20000000000
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) 95690
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

haproxy.cfg

#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
#    log         127.0.0.1 local2

    chroot      /var/lib/haproxy
    pidfile     /tmp/haproxy.pid
    maxconn     8000
#    ulimit-n    100000
    log 	127.0.0.1 	local2
    daemon
    nbproc	2
#    nbthread 	2
        cpu-map 1 0
        cpu-map 2 1

    stats bind-process 1

    # turn on stats unix socket
    stats socket /var/lib/haproxy/stats mode 600 level admin

    debug
    #quiet

    ssl-default-bind-ciphers kEECDH+aRSA+AES:kRSA+AES:+AES256:RC4-SHA:!kEDH:!LOW:!EXP:!MD5:!aNULL:!eNULL
    # ssl-default-bind-options no-sslv3

#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------

defaults
    mode                    http
    log                     global
    option                  dontlognull
    ## option forwardfor       except 127.0.0.0/8
    option		    dontlog-normal
    option                  clitcpka
    option                  redispatch
    option		    http-server-close
    retries                 3
    timeout http-request    10s
    timeout queue           1m
    timeout connect         30s
    # timeout connect       5m
    timeout client          1m
    # timeout client        30m
    timeout server          1m
    maxconn		    3000
    # timeout server        30m
    #timeout http-keep-alive 10s
    #timeout check           10s

# HAProxy Stats - http://10.62.232.27:1900
listen haproxy_stats
    mode http
    bind *:1900
    ###log-format %ci:%cp\ [%t]\ %ft\ %b/%s\ %Tq/%Tw/%Tc/%Tr/%Tt\ %ST\ %B\ %CC\ %CS\ %tsc\ %ac/%fc/%bc/%sc/%rc\ %sq/%bq\ %hr\ %hs\ %{+Q}r\ %ri
#    option httplog
#    log global
    timeout client 1h
    stats enable
    stats hide-version
    stats realm Haproxy\ Statistics
    stats uri /
    stats auth haproxy:haproxy
    stats refresh 2s
    bind-process 1


### incoming frontend
### tcp frontend
frontend tcp-proxy
        bind            *:9996
        mode            tcp
#        log             global
#        maxconn         8000

	tcp-request inspect-delay 3s
	tcp-request content capture req.payload(0,0) len 400000
	tcp-request content capture req.len len 400000
	tcp-request content accept if WAIT_END

	### if match type mgslogging
	acl mgslogging req.payload(0,0) -m reg (\"type\":\"MGS-Logging\")
	use_backend mgslogging_backend if mgslogging

	default_backend dft_backend


### tcp frontend for netuitive
frontend tcp-proxy-netuitive
        bind            *:9993
        mode            tcp
#        log             global
        maxconn         100

	default_backend netuitive_backend

### riemann metrics frontend
frontend metrics-proxy
	bind		*:9994
	mode		tcp
#	log		global
	maxconn		2000

	default_backend default_metrics_backend

### metricbeat frontend
#frontend metricbeat
#        bind            *:9983
#        mode            tcp
#        log             global
#        maxconn         8000
#        timeout client  10800

#        default_backend metricsbeat_backend

### decaf frontend
frontend decaf
        bind            *:9991
        mode            tcp
#        log             global
        maxconn         300

        default_backend decaf_backend


### new bpm frontend
#frontend nifi-bpm
#        bind            *:9981
#        mode            tcp
#        log             global
#        maxconn         300
#        timeout client  10800
#        default_backend nifi_bpm_backend

### iislog
frontend iislog
        bind            *:9990
        mode            tcp
#        log             global
        maxconn         3500
        option clitcpka

        default_backend iislogs_backend

### sqljobs
frontend sqljobs
        bind            *:9986
        mode            tcp
#        log             global
        maxconn         2000
        option clitcpka

        default_backend sqljob_backend

### wineventlog frontend
frontend wineventlog
        bind            *:9982
        mode            tcp
#        log             global
        maxconn         6000
        option clitcpka

#        default_backend wineventlog_backend
	default_backend wineventlog_beat_backend

### http frontend
frontend http-proxy
	bind		*:9983
	mode		http
#	log		global
        option http-server-close

	acl cert_info path_beg /cert_info
	acl cluster_info path_beg /cluster_info
	acl ambari_info path_beg /ambari_info
	use_backend cert_info_http_backend if cert_info
	use_backend cluster_info_http_backend if cluster_info
	use_backend ambari_info_http_backend if ambari_info


### default backend
### Port: 9996
### In Use: Yes
backend dft_backend
        mode            tcp
	balance		roundrobin
	option		tcpka
#	option		tcp-check
        retries         2
	server tng2087 tng2087:9996 check maxconn 1024


### mgslogging_backend
### Port: 9996
### In Use: Yes
backend mgslogging_backend
        mode            tcp
	balance		roundrobin
        option  	tcpka
#	option  	tcp-check
        retries         2
	server tng2087 tng2087:9996 check maxconn 1024


### sqljob_backend
### Port: 9986
### In Use: Yes
backend sqljob_backend
        mode            tcp
        balance         roundrobin
        option          tcpka
        option          srvtcpka
#        option          tcp-check
        retries         2
        server tng2087 tng2087:9986 check maxconn 2048

### cert_info
### Port: 9985
### mode http
### In Use: Yes
backend cert_info_http_backend
	mode		http
        option http-server-close
	balance		roundrobin
	option 		forwardfor
	server	tng1286	tng1286.:9985 check maxconn 128


### cluster_info
### Port: 9986
### mode http
### In Use: Yes
backend cluster_info_http_backend
        mode            http
        balance         roundrobin
        option http-server-close
        option          forwardfor
        server  tng1286 tng1286.:9986 check maxconn 256

### ambari_info
### Port: 9987
### mode http
### In Use: Yes
backend ambari_info_http_backend
        mode            http
        option http-server-close
        balance         roundrobin
        option          forwardfor
        server  tng1286 tng1286.:9987 check maxconn 256



### site_json_backend
### Port: 9980
### In Use: No
#backend site_json_backend
#        mode            tcp
#        balance         roundrobin
#        option  tcpka
#        retries         2
#        server tng2087 10.62.233.69:9980 check maxconn 1024

### wineventlog
### Port: 9982
### In Use: No
#backend wineventlog_backend
#        mode            tcp
#        balance         roundrobin
#        option  tcpka
#        option          tcp-check
#        retries         2
#        balance         roundrobin
#        server tng2087 10.62.233.69:9982 check maxconn 1024
#        server tng2089 tng2089.:9982 check maxconn 1024

### wineventlog beat
### Port: 9982
### In Use: Yes
backend wineventlog_beat_backend
        mode            tcp
        balance         roundrobin
        option  tcpka
#        option          tcp-check
        retries         2
        balance         roundrobin
        server tng2089 tng2089.:9982 check maxconn 2048
        server tng2088 tng2088.:9982 check maxconn 2048

#backend metricsbeat_backend
#        mode            tcp
#        balance         roundrobin
#        option  tcpka
#        option          tcp-check
#        retries         2
#        balance         roundrobin
#        server tng2089 tng2089.:9983 check maxconn 4096


### ELK Beats related
### Port: 9983
### In Use: No
#backend beats_backend
#        mode            tcp
#        balance         roundrobin
#        option  tcpka
#        retries         2
#        server tng2087 10.62.233.69:9983 check maxconn 1024

###  DECAF
### Port: 9991
### In Use: Yes
backend decaf_backend
        mode            tcp
        balance         roundrobin
        option  tcpka
        retries         2
        server tng1286 10.62.232.108:9991 check maxconn 1024

### Netuitive
### Port: 9993
### In Use: Yes
backend netuitive_backend
        mode            tcp
        balance         roundrobin
#        option  tcpka
        retries         2
        server tng2087 10.62.233.80:9993 check maxconn 128

### IISlogs
### Port: 9990
### In Use: Yes
backend iislogs_backend
        mode            tcp
        balance         roundrobin
        option  tcpka
        retries         2
	server tng1806 10.62.233.38:9990 check maxconn 200
	server tng1850 10.62.233.45:9990 check maxconn 200
	server tng1851 10.62.233.46:9990 check maxconn 200

### Site IISlog
### Port: 9987
### In Use: No
backend tcp_iislogs_backend
        mode            tcp
        balance         roundrobin
        option  tcpka
        retries         2
#       server tng2087 10.62.233.69:9987 check maxconn 1024

### syslog
### Port: 9981
### In Use: Yes
backend syslog_backend
        mode            tcp
        balance         roundrobin
        option  tcpka
        retries         2
#       server tng2087 10.62.233.69:9981 check maxconn 1024

### Site http tcp
### Port: 9984
### In Use: Yes
backend http_backend
        mode            tcp
        balance         roundrobin
        option  tcpka
        retries         2
#       server tng2087 10.62.233.69:9981 check maxconn 1024


### default metrics backend
### Port: 9994
### In Use: Yes
backend default_metrics_backend
	mode		tcp
	balance		roundrobin
	option		tcpka
	retries		2
	server tng2089	10.62.233.82:9994 check maxconn 1024


### ELK Production Write Endpoint
listen elasticsearch-production-tcp-9200
	mode	tcp
	bind	*:9200
	option	tcpka
#	option	tcplog
#	option tcp-check
	balance roundrobin
        fullconn 600
        server tng2575 10.62.2.15:9200 check maxconn 512
        server tng2576 10.62.2.16:9200 check maxconn 512
        server tng2577 10.62.2.17:9200 check maxconn 512

### ELK Production Read Endpoint
listen elasticsearch-production-tcp-9203
        mode    tcp
        bind    *:9203
        option  tcpka
#        option  tcplog
#        option tcp-check
        balance roundrobin
        server tng2593 10.62.2.7:9200 check maxconn 200
        server tng2594 10.62.2.24:9200 check maxconn 200

listen elasticsearch-staging-tcp-9201
        mode    tcp
        bind    *:9201
        option  tcpka
#        option  tcplog
#	option tcp-check
        balance roundrobin
        server tng1863 10.62.0.235:9200 check maxconn 100
        server tng1867 10.62.0.236:9200 check maxconn 100
        server tng1868 10.62.0.251:9200 check maxconn 100

show info

Name: HAProxy
Version: 1.9.12
Release_date: 2019/10/24
Nbthread: 1
Nbproc: 1
Process_num: 1
Pid: 11330
Uptime: 0d 2h17m35s
Uptime_sec: 8255
Memmax_MB: 0
PoolAlloc_MB: 1867
PoolUsed_MB: 1847
PoolFailed: 0
Ulimit-n: 16067
Maxsock: 16067
Maxconn: 8000
Hard_maxconn: 8000
CurrConns: 2783
CumConns: 2478322
CumReq: 2478459
MaxSslConns: 0
CurrSslConns: 0
CumSslConns: 0
Maxpipes: 0
PipesUsed: 0
PipesFree: 0
ConnRate: 339
ConnRateLimit: 0
MaxConnRate: 1560
SessRate: 339
SessRateLimit: 0
MaxSessRate: 1560
SslRate: 0
SslRateLimit: 0
MaxSslRate: 0
SslFrontendKeyRate: 0
SslFrontendMaxKeyRate: 0
SslFrontendSessionReuse_pct: 0
SslBackendKeyRate: 0
SslBackendMaxKeyRate: 0
SslCacheLookups: 0
SslCacheMisses: 0
CompressBpsIn: 0
CompressBpsOut: 0
CompressBpsRateLim: 0
ZlibMemUsage: 0
MaxZlibMemUsage: 0
Tasks: 2858
Run_queue: 1
Idle_pct: 95
node: tng2030
Stopping: 0
Jobs: 2797
Unstoppable Jobs: 0
Listeners: 13
ActivePeers: 0
ConnectedPeers: 0
DroppedLogs: 0
BusyPolling: 0

OOM message

Nov  5 08:54:16 tng2030 kernel: keepalived invoked oom-killer: gfp_mask=0x200da, order=0, oom_score_adj=0
Nov  5 08:54:16 tng2030 kernel: keepalived cpuset=/ mems_allowed=0
Nov  5 08:54:16 tng2030 kernel: CPU: 0 PID: 1128 Comm: keepalived Not tainted 3.10.0-327.13.1.el7.x86_64 #1
Nov  5 08:54:16 tng2030 kernel: Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016
Nov  5 08:54:16 tng2030 kernel: ffff880427af6780 00000000e4284760 ffff8800361e7a68 ffffffff816356f4
Nov  5 08:54:16 tng2030 kernel: ffff8800361e7af8 ffffffff8163068f ffff880036070440 ffff880036070458
Nov  5 08:54:16 tng2030 kernel: 0000000000000206 ffff880427af6780 ffff8800361e7ae0 ffffffff8112893f
Nov  5 08:54:16 tng2030 kernel: Call Trace:
Nov  5 08:54:16 tng2030 kernel: [<ffffffff816356f4>] dump_stack+0x19/0x1b
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8163068f>] dump_header+0x8e/0x214
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8112893f>] ? delayacct_end+0x8f/0xb0
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8116ce7e>] oom_kill_process+0x24e/0x3b0
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8116c9e6>] ? find_lock_task_mm+0x56/0xc0
Nov  5 08:54:16 tng2030 kernel: [<ffffffff81088d8e>] ? has_capability_noaudit+0x1e/0x30
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8116d6a6>] out_of_memory+0x4b6/0x4f0
Nov  5 08:54:16 tng2030 kernel: [<ffffffff81173885>] __alloc_pages_nodemask+0xa95/0xb90
Nov  5 08:54:16 tng2030 kernel: [<ffffffff811b792a>] alloc_pages_vma+0x9a/0x140
Nov  5 08:54:16 tng2030 kernel: [<ffffffff81194915>] do_wp_page+0xd5/0x800
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8119719c>] handle_mm_fault+0x65c/0xf50
Nov  5 08:54:16 tng2030 kernel: [<ffffffff81641380>] __do_page_fault+0x150/0x450
Nov  5 08:54:16 tng2030 kernel: [<ffffffff816416a3>] do_page_fault+0x23/0x80
Nov  5 08:54:16 tng2030 kernel: [<ffffffff8163d908>] page_fault+0x28/0x30
Nov  5 08:54:16 tng2030 kernel: Mem-Info:
Nov  5 08:54:16 tng2030 kernel: Node 0 DMA per-cpu:
Nov  5 08:54:16 tng2030 kernel: CPU    0: hi:    0, btch:   1 usd:   0
Nov  5 08:54:16 tng2030 kernel: CPU    1: hi:    0, btch:   1 usd:   0
Nov  5 08:54:16 tng2030 kernel: Node 0 DMA32 per-cpu:
Nov  5 08:54:16 tng2030 kernel: CPU    0: hi:  186, btch:  31 usd: 182
Nov  5 08:54:16 tng2030 kernel: CPU    1: hi:  186, btch:  31 usd:  31
Nov  5 08:54:16 tng2030 kernel: Node 0 Normal per-cpu:
Nov  5 08:54:16 tng2030 kernel: CPU    0: hi:  186, btch:  31 usd:   0
Nov  5 08:54:16 tng2030 kernel: CPU    1: hi:  186, btch:  31 usd:   0
Nov  5 08:54:16 tng2030 kernel: active_anon:5330700 inactive_anon:502607 isolated_anon:0#012 active_file:0 inactive_file:250 isolated_file:0#012 unevictable:40 dirty:0 writeback:0 unstable:0#012 free:28858 slab_reclaimable:6581 slab_unreclaimable:9626#012 mapped:15649 shmem:70152 pagetables:205215 bounce:0#012 free_cma:0
Nov  5 08:54:16 tng2030 kernel: Node 0 DMA free:15860kB min:12kB low:12kB high:16kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:15992kB managed:15908kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:0kB slab_unreclaimable:16kB kernel_stack:0kB pagetables:0kB unstable:0kB bounce:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
Nov  5 08:54:16 tng2030 kernel: lowmem_reserve[]: 0 2817 24056 24056
Nov  5 08:54:16 tng2030 kernel: Node 0 DMA32 free:87180kB min:2320kB low:2900kB high:3480kB active_anon:2096816kB inactive_anon:527352kB active_file:0kB inactive_file:1000kB unevictable:12kB isolated(anon):0kB isolated(file):0kB present:3129216kB managed:2884844kB mlocked:12kB dirty:0kB writeback:0kB mapped:27088kB shmem:73816kB slab_reclaimable:3016kB slab_unreclaimable:4636kB kernel_stack:624kB pagetables:129544kB unstable:0kB bounce:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:1438 all_unreclaimable? yes
Nov  5 08:54:16 tng2030 kernel: lowmem_reserve[]: 0 0 21239 21239
Nov  5 08:54:16 tng2030 kernel: Node 0 Normal free:12392kB min:17508kB low:21884kB high:26260kB active_anon:19225984kB inactive_anon:1483076kB active_file:0kB inactive_file:0kB unevictable:148kB isolated(anon):0kB isolated(file):0kB present:22020096kB managed:21749064kB mlocked:148kB dirty:0kB writeback:0kB mapped:35508kB shmem:206792kB slab_reclaimable:23308kB slab_unreclaimable:33852kB kernel_stack:5584kB pagetables:691316kB unstable:0kB bounce:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes
Nov  5 08:54:16 tng2030 kernel: lowmem_reserve[]: 0 0 0 0
Nov  5 08:54:16 tng2030 kernel: Node 0 DMA: 1*4kB (U) 0*8kB 1*16kB (U) 1*32kB (U) 1*64kB (U) 1*128kB (U) 1*256kB (U) 0*512kB 1*1024kB (U) 1*2048kB (R) 3*4096kB (M) = 15860kB
Nov  5 08:54:16 tng2030 kernel: Node 0 DMA32: 12*4kB (UEM) 25*8kB (EM) 30*16kB (UEM) 44*32kB (EM) 40*64kB (EM) 24*128kB (EM) 10*256kB (EM) 10*512kB (UEM) 60*1024kB (EM) 5*2048kB (M) 0*4096kB = 87128kB
Nov  5 08:54:16 tng2030 kernel: Node 0 Normal: 1050*4kB (UEM) 324*8kB (UEM) 72*16kB (UEM) 29*32kB (UEM) 27*64kB (UEM) 12*128kB (UEM) 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 12136kB
Nov  5 08:54:16 tng2030 kernel: Node 0 hugepages_total=0 hugepages_free=0 hugepages_surp=0 hugepages_size=2048kB
Nov  5 08:54:16 tng2030 kernel: 179994 total pagecache pages
Nov  5 08:54:16 tng2030 kernel: 109581 pages in swap cache
Nov  5 08:54:16 tng2030 kernel: Swap cache stats: add 14172428, delete 14062847, find 8923856/8941358
Nov  5 08:54:16 tng2030 kernel: Free swap  = 0kB
Nov  5 08:54:16 tng2030 kernel: Total swap = 2097148kB
Nov  5 08:54:16 tng2030 kernel: 6291326 pages RAM
Nov  5 08:54:16 tng2030 kernel: 0 pages HighMem/MovableOnly
Nov  5 08:54:16 tng2030 kernel: 128872 pages reserved
Nov  5 08:54:16 tng2030 kernel: Out of memory: Kill process 31022 (haproxy) score 904 or sacrifice child
Nov  5 08:54:16 tng2030 kernel: Killed process 31022 (haproxy) total-vm:417401584kB, anon-rss:22588176kB, file-rss:0kB

I don’t see a reason for this, with just 8000 global maxconn and 2 processes you should not have that much memory usage. Also, you don’t have any bogus tuning like tune.bufsize in there, so I really do not understand what is causing the issue.

Are you sure that the configuration you posted is what is actually running, and it is 1.9.12? You replaced the executable and restarted haproxy fully?

Can you provide the output of haproxy -vv as well as show pools in 30 minutes intervals?

Here is the information:
I can see the Pool caphdr keep increase.
I would guess it might be something wrong at the frontend “tcp-proxy side”

    Dumping pools usage. Use SIGQUIT to flush them.
  - Pool cache_st (32 bytes) : 3005 allocated (96160 bytes), 613 used, 0 failures, 4 users, @0x7ab340=09 [SHARED]
  - Pool tcpcheck_ru (64 bytes) : 60611 allocated (3879104 bytes), 27946 used, 0 failures, 9 users, @0x7ab140=05 [SHARED]
  - Pool filter (96 bytes) : 1977 allocated (189792 bytes), 1 used, 0 failures, 3 users, @0x7ab3c0=10 [SHARED]
  - Pool uniqueid (128 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7aaec0=00 [SHARED]
  - Pool task (160 bytes) : 7682 allocated (1229120 bytes), 3272 used, 0 failures, 2 users, @0x7ab4c0=12 [SHARED]
  - Pool h1s (192 bytes) : 7609 allocated (1460928 bytes), 3199 used, 0 failures, 3 users, @0x7ab2c0=08 [SHARED]
  - Pool spoe_appctx (224 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab0c0=04 [SHARED]
  - Pool h2s (256 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab040=03 [SHARED]
  - Pool http_txn (320 bytes) : 1 allocated (320 bytes), 0 used, 0 failures, 3 users, @0x7aaf40=01 [SHARED]
  - Pool connection (416 bytes) : 13247 allocated (5510752 bytes), 6277 used, 0 failures, 2 users, @0x7ab440=11 [SHARED]
  - Pool dns_resolut (480 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab1c0=06 [SHARED]
  - Pool dns_answer_ (576 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab240=07 [SHARED]
  - Pool stream (992 bytes) : 7609 allocated (7548128 bytes), 3199 used, 0 failures, 1 users, @0x7aafc0=02 [SHARED]
  - Pool requri (1024 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab640=15 [SHARED]
  - Pool buffer (16384 bytes) : 4997 allocated (81870848 bytes), 42 used, 0 failures, 1 users, @0x7ab6c0=16 [SHARED]
  - Pool trash (16416 bytes) : 1 allocated (16416 bytes), 1 used, 0 failures, 1 users, @0x7ab740=17
  - Pool caphdr (400032 bytes) : 221640 allocated (2763746560 bytes), 221640 used, 0 failures, 2 users, @0x7ab5c0=14 [SHARED]
Total: 17 pools, 2865548128 bytes allocated, 2773181728 used.

haproxy -vv

HA-Proxy version 1.9.12 2019/10/24 - https://haproxy.org/
Build options :
  TARGET  = linux2628
  CPU     = generic
  CC      = gcc
  CFLAGS  = -O2 -g -fno-strict-aliasing -Wdeclaration-after-statement -fwrapv -Wno-unused-label -Wno-sign-compare -Wno-unused-parameter -Wno-old-style-declaration -Wno-ignored-qualifiers -Wno-clobbered -Wno-missing-field-initializers -Wtype-limits
  OPTIONS = USE_LINUX_TPROXY=1 USE_ZLIB=1 USE_REGPARM=1 USE_THREAD=1 USE_OPENSSL=1 USE_PCRE=1 USE_PCRE_JIT=1 USE_TFO=1

Default settings :
  maxconn = 2000, bufsize = 16384, maxrewrite = 1024, maxpollevents = 200

Built with OpenSSL version : OpenSSL 1.0.2k-fips  26 Jan 2017
Running on OpenSSL version : OpenSSL 1.0.2k-fips  26 Jan 2017
OpenSSL library supports TLS extensions : yes
OpenSSL library supports SNI : yes
OpenSSL library supports : SSLv3 TLSv1.0 TLSv1.1 TLSv1.2
Built with transparent proxy support using: IP_TRANSPARENT IPV6_TRANSPARENT IP_FREEBIND
Built with zlib version : 1.2.7
Running on zlib version : 1.2.7
Compression algorithms supported : identity("identity"), deflate("deflate"), raw-deflate("deflate"), gzip("gzip")
Built with PCRE version : 8.32 2012-11-30
Running on PCRE version : 8.32 2012-11-30
PCRE library supports JIT : yes
Encrypted password support via crypt(3): yes
Built with multi-threading support.

Available polling systems :
      epoll : pref=300,  test result OK
       poll : pref=200,  test result OK
     select : pref=150,  test result OK
Total: 3 (3 usable), will use epoll.

Available multiplexer protocols :
(protocols marked as <default> cannot be specified using 'proto' keyword)
              h2 : mode=HTX        side=FE|BE
              h2 : mode=HTTP       side=FE
       <default> : mode=HTX        side=FE|BE
       <default> : mode=TCP|HTTP   side=FE|BE

Available filters :
  [SPOE] spoe
  [COMP] compression
  [CACHE] cache
  [TRACE] trace

Seems like the Pool caphdr not release the memory.
Keep allocated more and more memory

Dumping pools usage. Use SIGQUIT to flush them.
  - Pool cache_st (32 bytes) : 3005 allocated (96160 bytes), 1100 used, 0 failures, 4 users, @0x7ab340=09 [SHARED]
  - Pool tcpcheck_ru (64 bytes) : 60611 allocated (3879104 bytes), 32881 used, 0 failures, 9 users, @0x7ab140=05 [SHARED]
  - Pool filter (96 bytes) : 1977 allocated (189792 bytes), 69 used, 0 failures, 3 users, @0x7ab3c0=10 [SHARED]
  - Pool uniqueid (128 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7aaec0=00 [SHARED]
  - Pool task (160 bytes) : 7682 allocated (1229120 bytes), 3756 used, 0 failures, 2 users, @0x7ab4c0=12 [SHARED]
  - Pool h1s (192 bytes) : 7609 allocated (1460928 bytes), 3683 used, 0 failures, 3 users, @0x7ab2c0=08 [SHARED]
  - Pool spoe_appctx (224 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab0c0=04 [SHARED]
  - Pool h2s (256 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab040=03 [SHARED]
  - Pool http_txn (320 bytes) : 1 allocated (320 bytes), 1 used, 0 failures, 3 users, @0x7aaf40=01 [SHARED]
  - Pool connection (416 bytes) : 13247 allocated (5510752 bytes), 7295 used, 0 failures, 2 users, @0x7ab440=11 [SHARED]
  - Pool dns_resolut (480 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab1c0=06 [SHARED]
  - Pool dns_answer_ (576 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab240=07 [SHARED]
  - Pool stream (992 bytes) : 7609 allocated (7548128 bytes), 3683 used, 0 failures, 1 users, @0x7aafc0=02 [SHARED]
  - Pool requri (1024 bytes) : 0 allocated (0 bytes), 0 used, 0 failures, 1 users, @0x7ab640=15 [SHARED]
  - Pool buffer (16384 bytes) : 4997 allocated (81870848 bytes), 88 used, 0 failures, 1 users, @0x7ab6c0=16 [SHARED]
  - Pool trash (16416 bytes) : 1 allocated (16416 bytes), 1 used, 0 failures, 1 users, @0x7ab740=17
  - Pool caphdr (400032 bytes) : 351190 allocated (3048284608 bytes), 351190 used, 0 failures, 2 users, @0x7ab5c0=14 [SHARED]
Total: 17 pools, 3150086176 bytes allocated, 3059885696 used.

I try to remove that part and it is right.
But still cannot figure it out where is the problem is and I had us this config for 1.5 year without any issue. LUL

### incoming frontend
### tcp frontend
frontend tcp-proxy
        bind            *:9996
        mode            tcp
#        log             global
#        maxconn         8000

	tcp-request inspect-delay 3s
	tcp-request content capture req.payload(0,0) len 400000
	tcp-request content capture req.len len 400000
	tcp-request content accept if WAIT_END

	### if match type mgslogging
	acl mgslogging req.payload(0,0) -m reg (\"type\":\"MGS-Logging\")
	use_backend mgslogging_backend if mgslogging

	default_backend dft_backend

I found the leak. It exists since the first 1.6 (in fact since 1.6-dev2). It is about the captures inside tcp-request rules. If there is no HTTP configured (mode TCP on the proxy or no HTTP sample fetch), these captures are never released.

I will fix it. However, I don’t know why you have this leak now. Probably a change in your configuration.

2 Likes

Actually I was at 1.8.8, then try to upgrade to 1.8.22 and 1.9.12.
None of them were working.
After you fix, what version will it be?

I pushed a fix and backported it as far as 1.8. It will be part of the next releases (2.0, 1.9 and 1.8). But nothing planned for the moment.

This will be fixed in the next releases, which are 1.8.23, 1.9.13 and 2.0.9. There are no dates yet and it will probably not be very soon.

You can pull those branches from git, use today’s snapshot (for example haproxy-ss-20191108) or apply the patch manually:

Haproxy 1.8:
http://git.haproxy.org/?p=haproxy-1.8.git;a=patch;h=438cfba09b70e88dc3b6ec2ebb7232992d2bf94e

Haproxy 1.9:
http://git.haproxy.org/?p=haproxy-1.9.git;a=patch;h=526c806c389539b529d9a8df24343bbc737495c7

Haproxy 2.0:
http://git.haproxy.org/?p=haproxy-2.0.git;a=patch;h=b6af6b3650a6a1209ba503b6937afef2c08402e8

Cool. appreciate it.
I had try it and works well.
Thank you for the effort