Hello,
I’m using HA-Proxy version 2.0.13-1~bpo10+1 2020/02/15 on Debian 10.
We have configured the following setup:
global
daemon
chroot /var/lib/haproxy
user haproxy
group haproxy
master-worker
stats socket /run/haproxy/admin.sock user haproxy group haproxy mode 660 level admin expose-fd listeners
stats timeout 10s
# Default SSL material locations
ca-base /etc/ssl/certs
crt-base /etc/ssl/private
tune.ssl.default-dh-param 2048
ssl-default-bind-options no-sslv3 no-tlsv10 no-tlsv11 no-tls-tickets
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
# See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
# intermediate configuration, tweak to your needs
ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
ssl-default-server-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
ssl-default-server-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
ssl-default-server-options no-sslv3 no-tlsv10 no-tlsv11 no-tls-tickets
ssl-dh-param-file /etc/haproxy/dhparam.pem
tune.ssl.cachesize 80000
tune.ssl.lifetime 900
log /dev/log len 4096 local0
log /dev/log len 4096 local1 notice
defaults
mode http
option http-server-close #Setting "option http-server-close" enables HTTP connection-close mode on the server side while keeping the ability to support HTTP keep-alive and pipelining on the client side. This provides the lowest latency on the client side (slow network) and the fastest session reuse on the server side to save server resources
option forwardfor #Enable insertion of the X-Forwarded-For header to requests sent to servers
timeout http-keep-alive 15s #Set the maximum allowed time to wait for a new HTTP request to appear
option redispatch #if a server designated by a cookie is down, clients may definitely stick to it because they cannot flush the cookie, so they will not be able to access the service anymore. Specifying "option redispatch" will allow the proxy to break cookie
option http-buffer-request # enable HTTP body logging, needed for body param extraction
retries 3 # per default start 3 retries if server isn't reacting
log global
log-format '{"host":"%H","ident":"haproxy-v2","pid":%pid,"time":"%Tl","customerId":"%[capture.req.hdr(4)]","haproxy":{"conn":{"actconn":%ac,"feconn":%fc,"beconn":%bc,"srv_conn":%sc},"queue":{"backend":%bq,"srv":%sq},"time":{"Tq":%Tq,"Tw":%Tw,"Tc":%Tc,"Tr":%Tr,"Tt":%Tt,"Ta":%Ta},"termination_state":"%tsc","retries":%rc,"proxying":{"client_ip":"%ci","client_port":%cp,"frontend_ip":"%fi","frontend_port":%fp,"backend_name":"%b","frontend_name":"%ft","server_name":"%s","server_ip":"%si","server_port":"%sp"},"ssl":{"version":"%sslv","ciphers":"%sslc"},"request":{"method":"%HM","uri":"%HU","uri-query-string":"%HQ","uri-path":"%HP","protocol":"%HV"},"req-headers":{"user-agent":"%[capture.req.hdr(0)]","referrer":"%[capture.req.hdr(1)]","content-length":"%[capture.req.hdr(2)]","requested-host":"%[capture.req.hdr(3)]"},"response":{"status_code":%ST},"bytes":{"uploaded":%U,"read":%B}}}'
no option log-separate-errors # we need the errors not separately as we have it in ELK stack
option log-health-checks # we want the haproxy checks logged as well
no option logasap # disabled as we want to see all time it needs to fulfill the request
no option dontlognull # disabled as we want to see normal requests as well
timeout connect 20s
timeout queue 30s
timeout client 60s
timeout server 60s
timeout client-fin 30s
timeout http-request 20s
timeout http-keep-alive 75s
timeout tarpit 15s
errorfile 400 /etc/haproxy/errors/400.http
errorfile 403 /etc/haproxy/errors/403.http
errorfile 408 /etc/haproxy/errors/408.http
errorfile 500 /etc/haproxy/errors/500.http
errorfile 502 /etc/haproxy/errors/502.http
errorfile 503 /etc/haproxy/errors/503.http
errorfile 504 /etc/haproxy/errors/504.http
### DEFINE FRONTENDS
# this is our general frontend
frontend http-https-in
mode http
bind <haproxy-server>:443 ssl crt /etc/haproxy/certs/10.2018..com.pem crt /etc/haproxy/certs/.pem crt /etc/haproxy/certs/.com.pem
bind <haproxy-server>:80
# CAPTURE DATA FROM REQUESTS FOR LOGGING
capture request header User-Agent len 256 # for user agent
capture request header Referer len 256 # for referrer
capture request header Content-Length len 10 # for content length of POST (does this also work for PUT?)
capture request header Host len 128 # for the host, subdomain, etc
http-request capture req.body_param(customerId) len 4
# HSTS (63072000 seconds)
# tell the haproxy backends and all services beyond that this is https/http
http-request add-header X-Forwarded-Proto https if { ssl_fc }
http-request add-header X-Forwarded-Proto http if !{ ssl_fc }
# redirect all non-https traffic
http-response set-header Strict-Transport-Security max-age=63072000
http-request redirect scheme https if !{ ssl_fc }
# DEFINE ACLS
...
# DEFINE ACL TO BACKEND
....
# DEFINE BACKENDS
backend backend01
balance roundrobin
option httpchk GET /healthcheck
http-check expect rstatus (2|3)[0-9][0-9]
server server1 <server-1-IP>:8585 check inter 10s fastinter 5s fall 2 rise 1 observe layer7 on-error mark-down error-limit 10 pool-purge-delay 30s
server server2 <server-2-IP>:8585 check inter 10s fastinter 5s fall 2 rise 1 observe layer7 on-error mark-down error-limit 10 pool-purge-delay 30s backup
The problem we’re facing are DOWNs and UPs for the configured server1 and server2 without them being really down. I see in the haproxy logs entries like:
Mar 19 21:10:08 haproxy-server haproxy[3203]: [WARNING] 078/211008 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10000ms, status: 0/1 DOWN.
Mar 19 21:10:09 haproxy-server haproxy[3203]: [WARNING] 078/211008 (3204) : Server backend01/server1 is DOWN. 0 active and 1 backup servers left. Running on backup. 1 sessions active, 0 requeued, 0 remaining in queue.
Mar 19 21:10:27 haproxy-server haproxy[3203]: [WARNING] 078/211027 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 8009ms, status: 2/2 UP.
Mar 19 21:10:27 haproxy-server haproxy[3203]: [WARNING] 078/211027 (3204) : Server backend01/server1 is UP. 1 active and 1 backup servers online. 0 sessions requeued, 0 total in queue.
Mar 19 21:10:47 haproxy-server haproxy[3203]: [WARNING] 078/211047 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 1/2 UP.
Mar 19 21:11:02 haproxy-server haproxy[3203]: [WARNING] 078/211102 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10000ms, status: 0/1 DOWN.
Mar 19 21:11:02 haproxy-server haproxy[3203]: [WARNING] 078/211102 (3204) : Server backend01/server1 is DOWN. 0 active and 1 backup servers left. Running on backup. 0 sessions active, 0 requeued, 0 remaining in queue.
Mar 19 21:12:17 haproxy-server haproxy[3203]: [WARNING] 078/211217 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 5006ms, status: 2/2 UP.
Mar 19 21:12:17 haproxy-server haproxy[3203]: [WARNING] 078/211217 (3204) : Server backend01/server1 is UP. 1 active and 1 backup servers online. 0 sessions requeued, 0 total in queue.
Mar 19 21:28:17 haproxy-server haproxy[3203]: [WARNING] 078/212817 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 1/2 UP.
Mar 19 21:28:32 haproxy-server haproxy[3203]: [WARNING] 078/212832 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 0/1 DOWN.
Mar 19 21:28:32 haproxy-server haproxy[3203]: [WARNING] 078/212832 (3204) : Server backend01/server1 is DOWN. 0 active and 1 backup servers left. Running on backup. 1 sessions active, 0 requeued, 0 remaining in queue.
Mar 19 21:28:47 haproxy-server haproxy[3203]: [WARNING] 078/212847 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 5006ms, status: 2/2 UP.
Mar 19 21:28:47 haproxy-server haproxy[3203]: [WARNING] 078/212847 (3204) : Server backend01/server1 is UP. 1 active and 1 backup servers online. 0 sessions requeued, 0 total in queue.
Mar 19 21:36:07 haproxy-server haproxy[3203]: [WARNING] 078/213607 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 1/2 UP.
Mar 19 21:36:12 haproxy-server haproxy[3203]: [WARNING] 078/213612 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 1ms, status: 2/2 UP.
Mar 19 21:40:27 haproxy-server haproxy[3203]: [WARNING] 078/214027 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10000ms, status: 1/2 UP.
Mar 19 21:40:42 haproxy-server haproxy[3203]: [WARNING] 078/214042 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 0/1 DOWN.
Mar 19 21:40:42 haproxy-server haproxy[3203]: [WARNING] 078/214042 (3204) : Server backend01/server1 is DOWN. 0 active and 1 backup servers left. Running on backup. 0 sessions active, 0 requeued, 0 remaining in queue.
Mar 19 21:43:20 haproxy-server haproxy[3203]: [WARNING] 078/214320 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 8006ms, status: 2/2 UP.
Mar 19 21:43:20 haproxy-server haproxy[3203]: [WARNING] 078/214320 (3204) : Server backend01/server1 is UP. 1 active and 1 backup servers online. 0 sessions requeued, 0 total in queue.
Mar 19 21:56:06 haproxy-server haproxy[3203]: [WARNING] 078/215606 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 1/2 UP.
Mar 19 21:56:21 haproxy-server haproxy[3203]: [WARNING] 078/215621 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 0/1 DOWN.
Mar 19 21:56:21 haproxy-server haproxy[3203]: [WARNING] 078/215621 (3204) : Server backend01/server1 is DOWN. 0 active and 1 backup servers left. Running on backup. 0 sessions active, 0 requeued, 0 remaining in queue.
Mar 19 21:58:11 haproxy-server haproxy[3203]: [WARNING] 078/215811 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 1ms, status: 2/2 UP.
Mar 19 21:58:11 haproxy-server haproxy[3203]: [WARNING] 078/215811 (3204) : Server backend01/server1 is UP. 1 active and 1 backup servers online. 0 sessions requeued, 0 total in queue.
Mar 19 22:10:09 haproxy-server haproxy[3203]: [WARNING] 078/221009 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10000ms, status: 1/2 UP.
Mar 19 22:10:24 haproxy-server haproxy[3203]: [WARNING] 078/221024 (3204) : Health check for server backend01/server1 failed, reason: Layer7 timeout, check duration: 10001ms, status: 0/1 DOWN.
Mar 19 22:10:24 haproxy-server haproxy[3203]: [WARNING] 078/221024 (3204) : Server backend01/server1 is DOWN. 0 active and 1 backup servers left. Running on backup. 0 sessions active, 0 requeued, 0 remaining in queue.
Mar 19 22:13:39 haproxy-server haproxy[3203]: [WARNING] 078/221339 (3204) : Health check for server backend01/server1 succeeded, reason: Layer7 check passed, code: 200, info: "HTTP status check returned code <3C>200<3E>", check duration: 5006ms, status: 2/2 UP.
Mar 19 22:13:39 haproxy-server haproxy[3203]: [WARNING] 078/221339 (3204) : Server backend01/server1 is UP. 1 active and 1 backup servers online. 0 sessions requeued, 0 total in queue.
But I also ran a separate script on the same machine hammering the server1 API with 5 requests/s for more than 2h and none of the ~31k requests was getting something other back than HTTP status code 200.
Did anyone experience something similar?