nginx: uninsane.org: block archive.org, google bot from select pages
This commit is contained in:
@@ -83,6 +83,28 @@ in
|
|||||||
# unversioned files
|
# unversioned files
|
||||||
locations."@fallback" = {
|
locations."@fallback" = {
|
||||||
root = "/var/www/sites/uninsane.org";
|
root = "/var/www/sites/uninsane.org";
|
||||||
|
extraConfig = ''
|
||||||
|
# instruct Google to not index these pages.
|
||||||
|
# see: <https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag#xrobotstag>
|
||||||
|
add_header X-Robots-Tag 'none, noindex, nofollow';
|
||||||
|
|
||||||
|
# best-effort attempt to block archive.org from archiving these pages.
|
||||||
|
# reply with 403: Forbidden
|
||||||
|
# User Agent is *probably* "archive.org_bot"; maybe used to be "ia_archiver"
|
||||||
|
# source: <https://archive.org/details/archive.org_bot>
|
||||||
|
# additional UAs: <https://github.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker>
|
||||||
|
#
|
||||||
|
# validate with: `curl -H 'User-Agent: "bot;archive.org_bot;like: something else"' -v https://uninsane.org/dne`
|
||||||
|
if ($http_user_agent ~* "(?:\b)archive.org_bot(?:\b)") {
|
||||||
|
return 403;
|
||||||
|
}
|
||||||
|
if ($http_user_agent ~* "(?:\b)archive.org(?:\b)") {
|
||||||
|
return 403;
|
||||||
|
}
|
||||||
|
if ($http_user_agent ~* "(?:\b)ia_archiver(?:\b)") {
|
||||||
|
return 403;
|
||||||
|
}
|
||||||
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
# uninsane.org/share/foo => /var/www/sites/uninsane.org/share/foo.
|
# uninsane.org/share/foo => /var/www/sites/uninsane.org/share/foo.
|
||||||
|
Reference in New Issue
Block a user