Hey,
I’m experiencing a strange connection reset problem and need help please.
I set up HAProxy on server A to route our HTTPS traffic as well as the TCP traffic for our Postgres DB and I experience connection problems.
For my scenario I’ve set up two frontends, one is listening on port 5432 (mode tcp), the other on port 443.
My config looks like:
global
daemon
chroot /var/lib/haproxy
user haproxy
group haproxy
master-worker
stats socket /run/haproxy/admin.sock user haproxy group haproxy mode 660 level admin expose-fd listeners
stats timeout 10s
# Default SSL material locations
ca-base /etc/ssl/certs
crt-base /etc/ssl/private
tune.ssl.default-dh-param 2048
ssl-default-bind-options no-sslv3 no-tlsv10 no-tlsv11 no-tls-tickets
ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
# See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
# intermediate configuration, tweak to your needs
ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
ssl-default-server-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
ssl-default-server-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
ssl-default-server-options no-sslv3 no-tlsv10 no-tlsv11 no-tls-tickets
# curl https://ssl-config.mozilla.org/ffdhe2048.txt > /path/to/dhparam.pem
ssl-dh-param-file /etc/haproxy/dhparam.pem
tune.ssl.cachesize 80000
tune.ssl.lifetime 900
log /dev/log len 4096 local0
log /dev/log len 4096 local1 notice
maxconn 30000 # allow up to 30k parallel connections (needs tuning of limit params)
defaults
mode http
option http-keep-alive
option forwardfor #Enable insertion of the X-Forwarded-For header to requests sent to servers
option redispatch
option http-buffer-request # enable HTTP body logging, needed for body param extraction
retries 3 # per default start 3 retries if server isn't reacting
log global
log-format '{"host":"%H","ident":"haproxy-v2","pid":%pid,"time":"%Tl","cf-connecting-ip":"%[capture.req.hdr(4)]","customerId":"%[capture.req.hdr(5)]","haproxy":{"conn":{"actconn":%ac,"feconn":%fc,"beconn":%bc,"srv_conn":%sc},"queue":{"backend":%bq,"srv":%sq},"time":{"Ta":%Ta,"Tc":%Tc,"Td":%Td,"Th":%Th,"Ti":%Ti,"Tq":%Tq,"TR":%TR,"Tr":%Tr,"Tt":%Tt,"Tw":%Tw},"termination_state":"%tsc","retry":"%rc","proxying":{"client_ip":"%ci","client_port":%cp,"frontend_ip":"%fi","frontend_port":%fp,"backend_name":"%b","frontend_name":"%ft","server_name":"%s","server_ip":"%si","server_port":"%sp"},"ssl":{"version":"%sslv","ciphers":"%sslc"},"request":{"method":"%HM","uri":"%HU","uri-query-string":"%HQ","uri-path":"%HP","protocol":"%HV"},"req-headers":{"user-agent":"%[capture.req.hdr(0)]","referrer":"%[capture.req.hdr(1)]","content-length":"%[capture.req.hdr(2)]","requested-host":"%[capture.req.hdr(3)]"},"response":{"status_code":%ST},"bytes":{"uploaded":%U,"read":%B}}}'
no option log-separate-errors # we need the errors not separately as we have it in ELK stack
option log-health-checks # we want the haproxy checks logged as well
no option logasap # disabled as we want to see all time it needs to fulfill the request
no option dontlognull # disabled as we want to see normal requests as well
timeout connect 5s
timeout queue 15s
timeout client 30s
timeout server 100s
timeout client-fin 30s
timeout http-request 10s
timeout http-keep-alive 10s
timeout tarpit 15s
listen insights-v3-timescaledb-postgres
bind 1.2.3.4:5432
mode tcp
option tcplog
option tcp-check
timeout connect 10s
timeout client 5m
timeout server 5m
server postgres_5432 5.6.7.8:5432 check
frontend http-https-in
mode http
bind 1.2.3.4:443 ssl crt /etc/haproxy/certs/xxx.pem
bind 1.2.3.4:80
# HSTS (63072000 seconds)
# tell the haproxy backends and all services beyond that this is https/http
http-request add-header X-Forwarded-Proto https if { ssl_fc }
http-request add-header X-Forwarded-Proto http if !{ ssl_fc }
# redirect all non-https traffic
#http-response set-header Strict-Transport-Security max-age=63072000
#http-request redirect scheme https if !{ ssl_fc }
# DEFINE ACLS
acl sx_cdn hdr_beg(host) -i cdn
use_backend apache2_www if sx_cdn
# DEFINE BACKENDS
backend apache2_www
server apache2_8081 5.6.7.8:8081 check inter 10s fall 3 rise 1 pool-purge-delay 30s
timeout queue 120s
timeout server 120s
timeout connect 120s
I have a service running on a second server B which connects to the HAProxy’s 443 port for HTTP as well as the 5432 port for Postgres.
Postgres is configured to do tcp keep alive checks:
boneConfig.setAcquireRetryAttempts(10); // set retries before reporting broken connection
boneConfig.setAcquireRetryDelayInMs(2000);
boneConfig.setIdleConnectionTestPeriodInSeconds(30); // check each connection every x seconds
boneConfig.setConnectionTestStatement("SELECT NOW()"); // ..with this test query
boneConfig.setIdleMaxAgeInMinutes(1);
boneConfig.setMaxConnectionAgeInSeconds(110);
boneConfig.setTransactionRecoveryEnabled(true);
This service also requests via HTTP port 443 the HAProxy from time to time.
The problem: after the service is launched the Postgres connection to HAProxy is immediately established and from time to time HTTP requests are done to HAProxy. After some time the service reports a broken connection:
Exception Unable to open a test connection to the given database. JDBC url = jdbc:postgresql://postgresql.xxxx.com:5432/postgres?useServerPrepStmts=false&cachePrepStmts=false&useUnicode=true&characterEncoding=UTF-8&sessionVariables=group_concat_max_len=16777216&tcpKeepAlive=true&keepalives_idle=60&keepalives_interval=5, Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------
org.postgresql.util.PSQLException: Connection to postgresql.xxxx.com:5432 refused. Check that the hostname and port are correct and that the postmaster is accepting TCP/IP connections.
I’ve found out that the timestamp of this error message correlates with HTTP traffic coming in exactly at this time.
Could it be that the long lasting Postgres TCP connection of server B to server A is reset by server A because server B opened a second HTTP connection from time to time?
Please note: not every second HTTP connection provokes the TCP error shown before, it happens quite seldom, but it happens (and shouldn’t happen). It seems to be nondeterministic or at least not obvious why the Postgres connection breaks sometime when a HTTP connection is made.
Does someone has an idea how to debug this further?
Thanks for any help!