Have an issue where suricata does not always cleanly recover from memory pressure on the instance
Background:
suricata is installed on a number of internet facing hosts in doing ips
we have an overnight memory scan (at approx 02:30 UTC) that runs on the aws instances running suricata
Issue:
when this runs it does not use all the available RAM but it does cause the ubuntu 18.04 linux kernel to move some of the suricata RSS memory to swap (so the RSS memory usage goes from 250Mb down to approx 50Mb)
this will then recover and suricata will move its RSS back up to 250Mb and work normally when traffic picks up in the mornings. However after a number of days of this suricata will sometimes stop processing traffic and the websites will be unavailable.
sometimes suricata will resolve and work - other times we will see issues later in the day where suricata will stop any pass through
there are no log messages in suricata.log / drop.log / fast.log
A stop / start of suricata will not always clear this issue either - which seems odd.
I have to restart the instance for the issue to be resolved - which then leads to the cycle above
wondering if anyone else has seen this before I raise it as a bug
What version of Suricata are you running?
How much memory in total is available?
Can you share the config as well?
Hi Andreas
thanks for the response – details below
apologies for rubbish formatting
Suricata 6.0.2
memory total is
$ free -m
total used free shared buff/cache available
Mem: 7666 4356 802 2 2507 3011
Swap: 9215 534 8681
The clamscan command was running the following limits
$ ionice -c idle nice -n 10 /usr/bin/clamscan -ir --exclude=^/sys --exclude=^/proc --exclude=^/etc/snort/rules --exclude=^/etc/suricata/rules/ --tempdir=/mnt/tmp/clamav --no-summary --max-filesize=1024M --max-scansize=2048M
Config is:
%YAML 1.1
vars:
address-groups:
HOME_NET: ‘[0.0.0.0/0]’
EXTERNAL_NET: ‘[0.0.0.0/0]’
HTTP_SERVERS: $HOME_NET
SMTP_SERVERS: $HOME_NET
SQL_SERVERS: $HOME_NET
DNS_SERVERS: $HOME_NET
TELNET_SERVERS: $HOME_NET
AIM_SERVERS: $EXTERNAL_NET
DNP3_SERVER: $HOME_NET
DNP3_CLIENT: $HOME_NET
MODBUS_CLIENT: $HOME_NET
MODBUS_SERVER: $HOME_NET
ENIP_CLIENT: $HOME_NET
ENIP_SERVER: $HOME_NET
port-groups:
HTTP_PORTS: ‘[80,5080]’
SHELLCODE_PORTS: ‘!80’
ORACLE_PORTS: 1521
SSH_PORTS: 22
DNP3_PORTS: 20000
MODBUS_PORTS: 502
default-rule-path: /etc/snort/rules
rule-files:
- local.rules
- attack-responses.rules
- backdoor.rules
- bad-traffic.rules
- community-bot.rules
- community-dos.rules
- community-game.rules
- community-inappropriate.rules
- community-misc.rules
- community-sql-injection.rules
- community-virus.rules
- community-icmp.rules
- community-web-attacks.rules
- community-web-cgi.rules
- community-web-misc.rules
- dns.rules
- icmp-info.rules
- icmp.rules
- mysql.rules
- p2p.rules
- policy.rules
- rpc.rules
- rservices.rules
- scan.rules
- shellcode.rules
- sql.rules
- web-attacks.rules
- web-cgi.rules
- web-misc.rules
- botcc.portgrouped.rules
- emerging-attack_response.rules
- emerging-icmp_info.rules
- emerging-rpc.rules
- emerging-scan.rules
- emerging-shellcode.rules
- emerging-exploit.rules
- emerging-info.rules
- emerging-current_events.rules
- emerging-icmp.rules
- emerging-dns.rules
- emerging-dos.rules
- emerging-p2p.rules
- emerging-sql.rules
- emerging-web_server.rules
- emerging-worm.rules
- emerging-ftp.rules
- emerging-policy.rules
- emerging-trojan.rules
classification-file: /etc/suricata/classification.config
reference-config-file: /etc/suricata/reference.config
default-log-dir: /var/log/suricata/
stats:
enabled: true
interval: 8
outputs:
- fast:
enabled: true
append: true
filename: fast.log
- http-log:
enabled: false
append: true
filename: http.log
- stats:
interval: 8
enabled: false
filename: stats.log
- drop:
enabled: true
append: true
filename: drop.log
logging:
default-log-level: info
default-output-filter:
outputs:
- console:
enabled: true
- file:
enabled: true
filename: /var/log/suricata/suricata.log
- syslog:
format: “’[%i] <%d> – '”
enabled: true
facility: local5
af-packet:
- interface: eth0
cluster-id: 99
cluster-type: cluster_flow
defrag: true
- interface: default
pcap:
- interface: eth0
- interface: default
pcap-file:
checksum-checks: auto
app-layer:
protocols:
tls:
enabled: true
detection-ports:
dp: 443
dcerpc:
enabled: true
ftp:
enabled: true
ssh:
enabled: true
smtp:
enabled: true
mime:
decode-mime: true
decode-base64: true
decode-quoted-printable: true
header-value-depth: 2000
extract-urls: true
body-md5: false
inspected-tracker:
content-limit: 100000
content-inspect-min-size: 32768
content-inspect-window: 4096
imap:
enabled: detection-only
msn:
enabled: detection-only
smb:
enabled: true
detection-ports:
dp: 139
dns:
tcp:
enabled: true
detection-ports:
dp: 53
udp:
enabled: true
detection-ports:
dp: 53
http:
enabled: true
libhtp:
default-config:
personality: IDS
request-body-limit: 100kb
response-body-limit: 100kb
request-body-minimal-inspect-size: 32kb
request-body-inspect-window: 4kb
response-body-minimal-inspect-size: 40kb
response-body-inspect-window: 16kb
response-body-decompress-layer-limit: 2
http-body-inline: auto
double-decode-path: false
double-decode-query: false
server-config:
modbus:
enabled: false
detection-ports:
dp: 502
stream-depth: 0
dnp3:
enabled: false
detection-ports:
dp: 20000
enip:
enabled: false
detection-ports:
dp: 44818
sp: 44818
asn1-max-frames: 256
coredump:
max-dump: unlimited
host-mode: auto
unix-command:
enabled: false
filename: /var/run/suricata-command.socket
legacy:
uricontent: enabled
engine-analysis:
rules-fast-pattern: true
rules: true
pcre:
match-limit: 3500
match-limit-recursion: 1500
host-os-policy:
windows: [0.0.0.0/0]
bsd: []
bsd-right: []
old-linux: []
linux:
- 10.0.0.0/8
old-solaris: []
solaris: []
hpux10: []
hpux11: []
irix: []
macos: []
vista: []
windows2k3: []
defrag:
memcap: 32mb
hash-size: 65536
trackers: 65535 # number of defragmented flows to follow
max-frags: 65535 # number of fragments to keep (higher than trackers)
prealloc: true
timeout: 60
flow:
memcap: 128mb
hash-size: 65536
prealloc: 10000
emergency-recovery: 30
vlan:
use-for-tracking: true
flow-timeouts:
default:
new: 30
established: 300
closed: 0
bypassed: 100
emergency-new: 10
emergency-established: 100
emergency-closed: 0
emergency-bypassed: 50
tcp:
new: 60
established: 600
closed: 60
bypassed: 100
emergency-new: 5
emergency-established: 100
emergency-closed: 10
emergency-bypassed: 50
udp:
new: 30
established: 300
bypassed: 100
emergency-new: 10
emergency-established: 100
emergency-bypassed: 50
icmp:
new: 30
established: 300
bypassed: 100
emergency-new: 10
emergency-established: 100
emergency-bypassed: 50
stream:
memcap: 64mb
checksum-validation: true # reject wrong csums
inline: auto # auto will use inline mode in IPS mode, yes or no set it statically
reassembly:
memcap: 256mb
depth: 1mb # reassemble 1mb into a stream
toserver-chunk-size: 2560
toclient-chunk-size: 2560
randomize-chunk-size: true
host:
hash-size: 4096
prealloc: 1000
memcap: 32mb
detect:
profile: low
custom-values:
toclient-groups: 3
toserver-groups: 25
sgh-mpm-context: auto
inspection-recursion-limit: 3000
prefilter:
default: mpm
grouping:
profiling:
grouping:
dump-to-disk: false
include-rules: false # very verbose
include-mpm-stats: false
rule-reload: true
mpm-algo: auto
spm-algo: auto
threading:
set-cpu-affinity: false
cpu-affinity:
- management-cpu-set:
cpu: [0] # include only these cpus in affinity settings
- receive-cpu-set:
cpu: [0] # include only these cpus in affinity settings
- worker-cpu-set:
cpu: [all]
mode: exclusive
prio:
low: [0]
medium: [1-2]
high: [3]
default: medium
detect-thread-ratio: 1.0
luajit:
states: 128
profiling:
rules:
enabled: true
filename: rule_perf.log
append: true
sort: avgticks
limit: 100
json: true
keywords:
enabled: true
filename: keyword_perf.log
append: true
rulegroups:
enabled: true
filename: rule_group_perf.log
append: true
packets:
enabled: true
filename: packet_stats.log
append: true
csv:
enabled: false
filename: packet_stats.csv
locks:
enabled: false
filename: lock_stats.log
append: true
pcap-log:
enabled: false
filename: pcaplog_stats.log
append: true
nfq:
nflog:
- group: 2
buffer-size: 18432
- group: default
qthreshold: 1
qtimeout: 100
max-size: 20000
capture:
netmap:
- interface: eth2
- interface: default
pfring:
- interface: eth0
threads: 1
cluster-id: 99
cluster-type: cluster_flow
- interface: default
ipfw:
napatech:
hba: -1
use-all-streams: true
streams: [1, 2, 3]
mpipe:
load-balance: dynamic
iqueue-packets: 2048
inputs:
- interface: xgbe2
- interface: xgbe3
- interface: xgbe4
stack:
size128: 0
size256: 9
size512: 0
size1024: 0
size1664: 7
size4096: 0
size10386: 0
size16384: 0
cuda:
mpm:
data-buffer-size-min-limit: 0
data-buffer-size-max-limit: 1500
cudabuffer-buffer-size: 500mb
gpu-transfer-size: 50mb
batching-timeout: 2000
device-id: 0
cuda-streams: 2
run-as:
group: suricata
user: suricata
threshold-file: /etc/suricata/threshold.config