Hi!
We have some issues where requests doesn’t seem to rebalance to another host if we do a graceful shutdown, we have “retries 20”, and “option redispatch 3” combined with “balance uri” and “hash-type consistent djb2”, and when we close the listening socket of the service haproxy seems to spend all 20 retries on the same host and then returning a 503 even though it should retry on another host at the third reconnect attempt due to redispatch.
Wanted behaviour is: if connection-refused, pick another server, doesn’t matter if it’s random, just pick some other server which works.
There are a couple of loglines showing the behaviour this error after the config.
Config:
global
log /dev/log len 65535 local0 info alert
log /dev/log len 65535 local1 notice alert
user haproxy
group haproxy
nbproc 1
nbthread 1
maxconn 100000
hard-stop-after 600s # After 10 minutes (clean soft-stop on reloads for mostly tcp connections)
stats socket /var/run/haproxy.sock mode 660 level admin
# server-state-file /var/lib/haproxy/server-state
stats timeout 2m # Wait up to 2 minutes for input
master-worker # Launches one master process and a number of additional worker processesdefaults
log global
mode http
option httplog
timeout connect 3s
timeout client 30s
timeout server 30s
timeout http-request 30s
timeout http-keep-alive 60s
timeout queue 120s
timeout check 10s
retries 20 # Max retry attempts on a single server during connect failures
option redispatch 3 # Allow the redispatch to another server on every Xth retry
option forwardfor # Forward request headers from the original client to the backend
# load-server-state-from-file global
default-server init-addr last,none fastinter 1s rise 2 downinter 1s fall 2 on-error fastinter # Skip DNS resolution on startup (lazy resolution) and aggressive health checking
no option http-server-close # keep backend connections alive
option tcp-smart-connect
option tcp-smart-accept
option splice-auto
errorfile 400 /etc/haproxy/errors/400.http
errorfile 403 /etc/haproxy/errors/403.http
errorfile 408 /etc/haproxy/errors/408.http
errorfile 500 /etc/haproxy/errors/500.http
errorfile 502 /etc/haproxy/errors/502.http
errorfile 503 /etc/haproxy/errors/503.http
errorfile 504 /etc/haproxy/errors/504.httpfrontend http-in
bind *:80
log-tag haproxy.requests
maxconn 100000
capture request header User-Agent len 30
capture request header X-Request-ID len 36
capture request header Host len 32
log-format “{"message_type":"HTTP","request_time":"%t","host":"%H","protocol":"http","http_status":%ST,"user_agent":%{+Q}[capture.req.hdr(0)],"unique_id":%{+Q}[capture.req.hdr(1)],"headers":"%hr","endpoint":"%HP","backend":"%b","backend_name":%{+Q}[capture.req.hdr(2)],"http_method":"%HM","upstream_response_time":%Tr,"upstream_connect_time":%Tc,"bytes_read":%B,"sconn":"%sc","bconn":"%bc","fconn":"%fc","upstream_addr":"%si","upstream_port":"%sp","server_name":"%s","source_addr":"%bi","source_port":"%sp","retries":"%rc","bytes_uploaded":%U,"session_duration":%Tt,"termination_state":"%ts","http_query_params":"%HQ","accept_time":%Th,"idle_time":%Ti,"client_time":%TR,"wait_time":%Tw,"download_time":%Td,"active_time":%Ta}”use_backend configserver if { hdr(Host) -i configserver }
backend configserver
mode http
option allbackups
balance uri
hash-type consistent djb2
hash-balance-factor 150
server configserver-eu-west-1a-1 10.14.66.188:17914 maxconn 200 check backup
server configserver-eu-west-1a-2 10.14.66.188:17978 maxconn 200 check backup
server configserver-eu-west-1a-3 10.14.66.188:17987 maxconn 200 check backup
server configserver-eu-west-1a-4 10.14.75.245:17961 maxconn 200 check backup
server configserver-eu-west-1a-5 10.14.75.245:18000 maxconn 200 check backup
server configserver-eu-west-1b-6 10.14.80.211:16616 maxconn 200 check
server configserver-eu-west-1b-7 10.14.80.211:16625 maxconn 200 check
server configserver-eu-west-1b-8 10.14.92.90:16854 maxconn 200 check
server configserver-eu-west-1b-9 10.14.92.90:16859 maxconn 200 check
Logs:
message_type:HTTP backend:configserver request_time:21/Dec/2018:03:00:52.624 host:i-04785de9a52f8c57f protocol:http http_status:503 user_agent: unique_id: headers:{||configserver} endpoint:/path1 backend_name:configserver http_method:GET upstream_response_time:-1 upstream_connect_time:-1 bytes_read:213 sconn:0 bconn:0 fconn:1 upstream_addr:10.14.80.211 upstream_port:16636 server_name:configserver-eu-west-1b-9 source_addr:10.14.80.211 source_port:16636 retries:20 bytes_uploaded:98 session_duration:26,951 termination_state:SC http_query_params: accept_time:0 idle_time:6,924 client_time:0 wait_time:18,025 download_time:-1 active_time:20,027 environment_type:prod local_ip:10.14.80.211 cluster:media system_timestamp:December 21st 2018, 04:01:19.000 tags.service:configserver tags.host:i-04785de9a52f8c57f tags.cluster:media tags.local_ip:10.14.80.211 logcount:1 @timestamp:December 21st 2018, 04:01:19.574 _id:zhq1zmcBsOLL-aj_9W0r _type:fluentd _index:haproxy-2018.12.21 _score: -
message_type:HTTP backend:configserver request_time:21/Dec/2018:03:00:50.562 host:i-02d86a4420a5ebf1f protocol:http http_status:503 user_agent: unique_id: headers:{||configserver} endpoint:/path2 backend_name:configserver http_method:GET upstream_response_time:-1 upstream_connect_time:-1 bytes_read:213 sconn:0 bconn:0 fconn:1 upstream_addr:10.14.92.90 upstream_port:16867 server_name:configserver-eu-west-1b-12 source_addr:10.14.92.90 source_port:16867 retries:20 bytes_uploaded:97 session_duration:26,027 termination_state:SC http_query_params: accept_time:0 idle_time:6,001 client_time:0 wait_time:18,023 download_time:-1 active_time:20,026 environment_type:prod local_ip:10.14.92.90 cluster:media system_timestamp:December 21st 2018, 04:01:16.000 tags.service:configserver tags.host:i-02d86a4420a5ebf1f tags.cluster:media tags.local_ip:10.14.92.90 logcount:1 @timestamp:December 21st 2018, 04:01:16.590 _id:shW1zmcBiH4YVdlV0fl7 _type:fluentd _index:haproxy-2018.12.21 _score: -
message_type:HTTP backend:configserver request_time:21/Dec/2018:02:56:55.415 host:i-02d86a4420a5ebf1f protocol:http http_status:503 user_agent: unique_id: headers:{||configserver} endpoint:/path3 backend_name:configserver http_method:GET upstream_response_time:-1 upstream_connect_time:-1 bytes_read:213 sconn:0 bconn:0 fconn:1 upstream_addr:10.14.92.90 upstream_port:16831 server_name:configserver-eu-west-1b-7 source_addr:10.14.92.90 source_port:16831 retries:20 bytes_uploaded:95 session_duration:23,147 termination_state:SC http_query_params: accept_time:0 idle_time:3,117 client_time:0 wait_time:18,026 download_time:-1 active_time:20,030 environment_type:prod local_ip:10.14.92.90 cluster:media system_timestamp:December 21st 2018, 03:57:18.000 tags.service:configserver tags.host:i-02d86a4420a5ebf1f tags.cluster:media tags.local_ip:10.14.92.90 logcount:1 @timestamp:December 21st 2018, 03:57:18.603 _id:KhmyzmcBsOLL-aj_TpV- _type:fluentd _index:haproxy-2018.12.21 _score: -
message_type:HTTP backend:configserver request_time:21/Dec/2018:02:56:58.225 host:i-02d86a4420a5ebf1f protocol:http http_status:503 user_agent: unique_id: headers:{||configserver} endpoint:/path4 backend_name:configserver http_method:GET upstream_response_time:-1 upstream_connect_time:-1 bytes_read:213 sconn:0 bconn:0 fconn:1 upstream_addr:10.14.92.90 upstream_port:16831 server_name:configserver-eu-west-1b-8 source_addr:10.14.92.90 source_port:16831 retries:20 bytes_uploaded:99 session_duration:20,041 termination_state:SC http_query_params: accept_time:0 idle_time:1 client_time:0 wait_time:18,038 download_time:-1 active_time:20,040 environment_type:prod local_ip:10.14.92.90 cluster:media system_timestamp:December 21st 2018, 03:57:18.000 tags.service:configserver tags.host:i-02d86a4420a5ebf1f tags.cluster:media tags.local_ip:10.14.92.90 logcount:1 @timestamp:December 21st 2018, 03:57:18.303 _id:2xmyzmcBsOLL-aj_TpR- _type:fluentd _index:haproxy-2018.12.21 _score: -