Hi,
Since upgrading from 1.7.9 to 1.7.10 I’m getting intermittent 504 errors with sR-- for the session termination state. Downgrading back to 1.7.9 fixes the issue. The log for those requests looks like:
%ac 2
%bc 1
%bq 0
%tsc sR–
%TR 0
%Tw 0
%Tc 5
%Tr -1
%Ta 5
%Th 0
%Ti 5874
%B 194
%ac 2
%fc 1
%bc 1
%sc 0
%rc 0
%sq 0
%bq 0
From the point of view of the the app on the backed, the request was processed and the answer sent back in 13ms.
Full haproxy config:
global
node apne-proxy-1c
log-send-hostname apne-proxy-1c
log 127.0.0.1 local5
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# Turn on stats unix socket
stats socket /var/lib/haproxy/stats mode 750 level admin
stats timeout 30s
server-state-base /var/state/haproxy/
server-state-file global
# Tune TLS
tune.ssl.default-dh-param 2048
ssl-default-bind-ciphers ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES256-SHA:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA
ssl-default-bind-options no-sslv3 no-tls-tickets
ssl-default-server-options no-tls-tickets
# The backend servers have self-signed certs
ssl-server-verify none
# Tune compression
maxcompcpuusage 50
tune.comp.maxlevel 3
# Tune buffering to accomodate voice + client_cert
tune.bufsize 32768
tune.maxrewrite 4096
defaults
mode http
# log global
log 127.0.0.1 format rfc5424 local4
option httplog
log-format §apne-proxy-1c§%ci§%cp§%tr§%HM§%[capture.req.hdr(0)]§%HP§%HQ§%HV§%CC§%CS§%[capture.req.hdr(2)]§%[capture.res.hdr(0)]§%ST§%[capture.req.hdr(1)]§%ID§%sslc§%sslv§%[ssl_fc_sni]§%ft§%b§%s§%tsc§%TR,%Tw,%Tc,%Tr,%Ta,%Th,%Ti,%Ts,%B,%ac,%fc,%bc,%sc,%rc,%sq,%bq
option log-separate-errors
option dontlognull
option http-keep-alive
option forwardfor
# Slow POST protection
option http-buffer-request
http-reuse safe
load-server-state-from-file global
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
# Enable compression
compression algo gzip
compression type text/html text/plain text/css text/javascript application/javascript application/json application/x-javascript application/xhtml+xml application/xml application/rss+xml application/atom_xml image/svg+xml
peers mypeers
peer apne-proxy-1b 10.29.0.75:1024
peer apne-proxy-1c 10.29.1.247:1024
backend idx-backend
balance roundrobin
# The sessions to the admin portal are sticky
stick-table type string len 32 size 1M expire 600s peers mypeers
stick on req.cook(idx)
stick store-response res.cook(idx)
# Health Check
option httpchk GET /DaonIdentityX/Monitor/View
http-check expect rstring Overall\ Status:\ (OK|WARN)
# Servers
server apne-idx-1b apne-idx-1b.local:8443 ssl check inter 3000
server apne-idx-1c apne-idx-1c.local:8443 ssl check inter 3000
server apne-idx-1b-02 apne-idx-1b-02.local:8443 ssl check inter 3000
server apne-idx-1c-02 apne-idx-1c-02.local:8443 ssl check inter 3000
frontend idx-direct-server-auth-frontend-apne
# Regular TLS
bind 10.29.1.247:443 ssl crt /etc/haproxy/tls/example.com.pem
# tcp current connections, tcp frequency counter, http frequency counter, http err frequency counter
stick-table type ip size 1m expire 30s store gpc0,conn_cur,conn_rate(3s),http_req_rate(10s),http_err_rate(30s) peers mypeers
# Track the connections
tcp-request connection track-sc1 src
# No tcp restrictions
acl no_tcp_restrictions src -f /etc/haproxy/lists/no_tcp_restrictions.txt
# Allowed IPs
acl trusted_ips src -f /etc/haproxy/lists/trusted_ips.txt
# No TCP connection restrictions from inside (RP servers and stuff)
tcp-request connection accept if no_tcp_restrictions or trusted_ips
# Reject over 50 concurrent connections per ip
tcp-request connection reject if { src_conn_cur ge 50 }
# Reject if more than 30 connections in 3 seconds
tcp-request connection reject if { src_conn_rate ge 30 }
# No TCP content restrictions from inside (RP servers and stuff)
tcp-request content accept if no_tcp_restrictions or trusted_ips
# Reject more than 30 http requests in 10 seconds
tcp-request content reject if { src_http_req_rate ge 30 }
# Reject if more than 10 http errors in 30 seconds
tcp-request content reject if { src_http_err_rate ge 10 }
# Cert pinning
rspadd Strict-Transport-Security:\ max-age=31536000;\ includeSubDomains
rspadd Public-Key-Pins:\ pin-sha256=\"enWPCYJtvn0yn99pyDJyBRNyw/iBUohykJd9EcFs+Js=\";\ pin-sha256=\"Sv0HHas+e1t1pseiclwN8WZ1i0h4w1/UxerXa2MKG0o=\";\ max-age=15768000;\ includeSubDomains
# Hide the server info
# 1.7.x bug
#rspidel ^Server:.*$
rspirep ^Server:.*$ Server:\ Hidden
# Delay to catch L7 info
tcp-request inspect-delay 5s
# Capture the host header
capture request header Host len 64
# Capture the user agent header
capture request header User-Agent len 128
# Capture the SOAP action header
capture request header SOAPAction len 128
# Capture the REST correlation ID header
capture response header X-Idx-RequestReference len 64
# Capture the idx cookie
capture cookie idx len 32
# Monitor uri
acl site_dead nbsrv(idx-backend) eq 0
monitor-uri /test
monitor fail if site_dead
# Check the host
acl correct_host hdr_reg(host) -i ^apne.example.com(:443)?$
tcp-request content reject unless correct_host
# IdX endpoints
acl admin_portal path_reg ^(/[a-zA-Z0-9-]+)?/IdentityXServices
acl monitor path_beg /DaonIdentityX/Monitor
acl favicon path /favicon.ico
# Allow the Admin portal
http-request allow if admin_portal
# Allow the monitor page from trusted IPs only
http-request allow if monitor trusted_ips
# Allow the favicon
http-request allow if favicon
# The default deny
http-request deny
# Backend
default_backend idx-backend
Not sure if there is something I’m doing wrong, and I was relying on a bug fixed in 1.7.10, or there was a bug introduced in 1.7.10
Thanks,
Victor