servo: gitea: place only the most expensive repos behind Anubis

This commit is contained in:
2025-08-16 08:15:47 +00:00
parent 9847e0171c
commit e0bb1b7c62

View File

@@ -134,8 +134,13 @@
# XXX(2025-07-24): gitea's still being crawled, even with robots.txt.
# the load is less than when Anthropic first started, but it's still pretty high (like 600%).
# place behind anubis to prevent AI crawlers from hogging my CPU (gitea is slow to render pages).
proxyPass = "http://unix:${config.services.anubis.instances."git.uninsane.org".settings.BIND}";
# proxyPass = "http://127.0.0.1:3000";
proxyPassHeavy = "http://unix:${config.services.anubis.instances."git.uninsane.org".settings.BIND}";
# but anubis breaks embeds, so only protect the expensive repos.
proxyPassLight = "http://127.0.0.1:3000";
proxyTo = proxy: root: {
proxyPass = proxy;
recommendedProxySettings = true;
};
in {
forceSSL = true; # gitea complains if served over a different protocol than its config file says
enableACME = true;
@@ -145,9 +150,20 @@
'';
locations."/" = {
inherit proxyPass;
proxyPass = proxyPassLight;
recommendedProxySettings = true;
};
# selectively proxy the heavyweight items through anubis.
# a typical interaction is:
# nginx:/colin/linux -> anubis:/colin/linux -> browser is served a loading page
# -> nginx:.within.website/x/cmd/anubis/api/pass-challenge?response=... -> anubis:.within.website/x/cmd/anubis/api/pass-challenge?response=... -> browser is forwarded to /colin/linux
# -> nginx:/colin/linux -> anubis:/colin/linux -> gitea:/colin/linux -> browser is served the actual content
locations."/.within.website/" = proxyTo proxyPassHeavy;
locations."/colin/linux" = proxyTo proxyPassHeavy;
locations."/colin/nixpkgs" = proxyTo proxyPassHeavy;
locations."/colin/opencellid-mirror" = proxyTo proxyPassHeavy;
locations."/colin/podcastindex-db-mirror" = proxyTo proxyPassHeavy;
# fuck you @anthropic
# locations."= /robots.txt".extraConfig = ''
# return 200 "User-agent: *\nDisallow: /\n";
@@ -155,7 +171,7 @@
# gitea serves all `raw` files as content-type: plain, but i'd like to serve them as their actual content type.
# or at least, enough to make specific pages viewable (serving unoriginal content as arbitrary content type is dangerous).
locations."~ ^/colin/phone-case-cq/raw/.*.html" = {
inherit proxyPass;
proxyPass = proxyPassLight;
recommendedProxySettings = true;
extraConfig = ''
proxy_hide_header Content-Type;
@@ -164,7 +180,7 @@
'';
};
locations."~ ^/colin/phone-case-cq/raw/.*.js" = {
inherit proxyPass;
proxyPass = proxyPassLight;
recommendedProxySettings = true;
extraConfig = ''
proxy_hide_header Content-Type;