Skip to content

Commit

Permalink
added very basic bot flagging
Browse files Browse the repository at this point in the history
  • Loading branch information
ansibleguy committed May 17, 2024
1 parent fc4184b commit 6355e20
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
8 changes: 8 additions & 0 deletions defaults/main/1_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ defaults_frontend:
block_script_bots: false
block_bad_crawler_bots: false

flag_bots: false
flag_bots_lines: [] # additional checks you want to append; you could p.e. check if a cookie set by JS exists
# prepend 'http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } ' before your conditions

log:
user_agent: true

Expand Down Expand Up @@ -151,6 +155,10 @@ defaults_backend:
block_script_bots: false
block_bad_crawler_bots: false

flag_bots: false
flag_bots_lines: [] # additional checks you want to append; you could p.e. check if a cookie set by JS exists
# prepend 'http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } ' before your conditions

# for health-checks see: https://www.haproxy.com/blog/how-to-enable-health-checks-in-haproxy
# more complex ones should be implemented by supplying the raw config-lines
check: true
Expand Down
24 changes: 24 additions & 0 deletions defaults/main/2_waf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,29 @@ defaults_waf:
- 'tiny-bot'
- 'fidget-spinner-bot'
- 'download'
- 'scan'
- 'nmap'
- 'Metasploit'
- 'ImageVacuum'
# python
- 'scrapy'
# golang
- 'zgrab'

any:
- 'bot'
- 'spider'
- 'photon'
- 'adsdefender'
- 'crawler'
- 'robot'
- 'image'
- 'proxy'
- 'download'
- 'scan'
- 'Chrome-Lighthouse'
- 'whatsapp'
- 'fetcher'
- 'office'
- 'facebook'
- 'feed'
2 changes: 1 addition & 1 deletion molecule/default/converge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@
# deny_dangerous_methods: true
block_script_bots: true
block_bad_crawler_bots: true

flag_bots: true

routes:
be_test:
Expand Down
36 changes: 36 additions & 0 deletions templates/etc/haproxy/conf.d/inc/security.j2
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,39 @@
http-request deny status {{ HAPROXY_WAF.block_code }} {{ BLOCK_ERRORFILE }} if { req.fhdr(User-Agent) -m sub -i {{ HAPROXY_WAF.user_agents.bad_crawlers.sub | ensure_list | join(' ') }} }
{% endif %}
{% endif %}
{% if cnf.security.flag_bots | bool %}
# FLAG BOTS
## flag bots by common user-agent substrings
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } { req.fhdr(User-Agent) -m sub -i {{ HAPROXY_WAF.user_agents.any | ensure_list | join(' ') }} }

{% if not cnf.security.block_script_bots | bool %}
## flag well-known script-bots
{% if HAPROXY_WAF.user_agents.script.full | length > 0 %}
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } { req.fhdr(User-Agent) -m str -i {{ HAPROXY_WAF.user_agents.script.full | ensure_list | join(' ') }} }
{% endif %}
{% if HAPROXY_WAF.user_agents.script.sub | length > 0 %}
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } { req.fhdr(User-Agent) -m sub -i {{ HAPROXY_WAF.user_agents.script.sub | ensure_list | join(' ') }} }
{% endif %}
{% endif %}
{% if not cnf.security.block_bad_crawler_bots | bool %}
## flag well-known bad-crawler-bots
{% if HAPROXY_WAF.user_agents.bad_crawlers.full | length > 0 %}
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } { req.fhdr(User-Agent) -m str -i {{ HAPROXY_WAF.user_agents.bad_crawlers.full | ensure_list | join(' ') }} }
{% endif %}
{% if HAPROXY_WAF.user_agents.bad_crawlers.sub | length > 0 %}
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } { req.fhdr(User-Agent) -m sub -i {{ HAPROXY_WAF.user_agents.bad_crawlers.sub | ensure_list | join(' ') }} }
{% endif %}
{% endif %}
## unusual if action has no referrer
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } !{ method GET HEAD } !{ req.hdr(Referer) -m found }
## browsers set this one usually
http-request set-var(txn.bot) int(1) if !{ var(txn.bot) -m found } !{ req.hdr(Accept-Language) -m found }

{% for line in cnf.security.flag_bots_lines %}
{{ line }}
{% endfor %}

http-request set-var(txn.bot) int(0) if !{ var(txn.bot) -m found }
http-request capture var(txn.bot) len 1

{% endif %}

0 comments on commit 6355e20

Please sign in to comment.