servo: gitea: place behind anubis

somebody was aggressively crawling it again, even with robots.txt :(
This commit is contained in:
2025-07-24 22:42:24 +00:00
parent 00acccb9c9
commit 97dfb58bbb
3 changed files with 25 additions and 9 deletions

View File

@@ -1,6 +1,6 @@
# config options: <https://docs.gitea.io/en-us/administration/config-cheat-sheet/> # config options: <https://docs.gitea.io/en-us/administration/config-cheat-sheet/>
# TODO: service shouldn't run as `git` user, but as `gitea` # TODO: service shouldn't run as `git` user, but as `gitea`
{ pkgs, lib, ... }: { config, pkgs, lib, ... }:
{ {
sane.persist.sys.byStore.private = [ sane.persist.sys.byStore.private = [
@@ -122,9 +122,17 @@
# services.openssh.settings.UsePAM = true; #< required for `git` user to authenticate # services.openssh.settings.UsePAM = true; #< required for `git` user to authenticate
services.anubis.instances."git.uninsane.org".settings.TARGET = "http://127.0.0.1:3000";
# hosted git (web view and for `git <cmd>` use # hosted git (web view and for `git <cmd>` use
# TODO: enable publog? # TODO: enable publog?
services.nginx.virtualHosts."git.uninsane.org" = { services.nginx.virtualHosts."git.uninsane.org" = let
# XXX(2025-07-24): gitea's still being crawled, even with robots.txt.
# the load is less than when Anthropic first started, but it's still pretty high (like 600%).
# place behind anubis to prevent AI crawlers from hogging my CPU (gitea is slow to render pages).
proxyPass = "http://unix:${config.services.anubis.instances."git.uninsane.org".settings.BIND}";
# proxyPass = "http://127.0.0.1:3000";
in {
forceSSL = true; # gitea complains if served over a different protocol than its config file says forceSSL = true; # gitea complains if served over a different protocol than its config file says
enableACME = true; enableACME = true;
# inherit kTLS; # inherit kTLS;
@@ -133,16 +141,18 @@
''; '';
locations."/" = { locations."/" = {
proxyPass = "http://127.0.0.1:3000"; inherit proxyPass;
recommendedProxySettings = true;
}; };
# fuck you @anthropic # fuck you @anthropic
locations."= /robots.txt".extraConfig = '' # locations."= /robots.txt".extraConfig = ''
return 200 "User-agent: *\nDisallow: /\n"; # return 200 "User-agent: *\nDisallow: /\n";
''; # '';
# gitea serves all `raw` files as content-type: plain, but i'd like to serve them as their actual content type. # gitea serves all `raw` files as content-type: plain, but i'd like to serve them as their actual content type.
# or at least, enough to make specific pages viewable (serving unoriginal content as arbitrary content type is dangerous). # or at least, enough to make specific pages viewable (serving unoriginal content as arbitrary content type is dangerous).
locations."~ ^/colin/phone-case-cq/raw/.*.html" = { locations."~ ^/colin/phone-case-cq/raw/.*.html" = {
proxyPass = "http://127.0.0.1:3000"; inherit proxyPass;
recommendedProxySettings = true;
extraConfig = '' extraConfig = ''
proxy_hide_header Content-Type; proxy_hide_header Content-Type;
default_type text/html; default_type text/html;
@@ -150,7 +160,8 @@
''; '';
}; };
locations."~ ^/colin/phone-case-cq/raw/.*.js" = { locations."~ ^/colin/phone-case-cq/raw/.*.js" = {
proxyPass = "http://127.0.0.1:3000"; inherit proxyPass;
recommendedProxySettings = true;
extraConfig = '' extraConfig = ''
proxy_hide_header Content-Type; proxy_hide_header Content-Type;
default_type text/html; default_type text/html;

View File

@@ -22,12 +22,15 @@
}; };
services.nginx.enable = true; services.nginx.enable = true;
users.users.nginx.extraGroups = [ "anubis" ];
# nginxStable is one release behind nginxMainline. # nginxStable is one release behind nginxMainline.
# nginx itself recommends running mainline; nixos defaults to stable. # nginx itself recommends running mainline; nixos defaults to stable.
# services.nginx.package = pkgs.nginxMainline; # services.nginx.package = pkgs.nginxMainline;
# XXX(2024-07-31): nixos defaults to zlib-ng -- supposedly more performant, but spams log with # XXX(2024-07-31): nixos defaults to zlib-ng -- supposedly more performant, but spams log with
# "gzip filter failed to use preallocated memory: ..." # "gzip filter failed to use preallocated memory: ..."
services.nginx.package = pkgs.nginxMainline.override { zlib = pkgs.zlib; }; # XXX(2025-07-24): "gzip filter" spam is gone => use default nginx package
# services.nginx.package = pkgs.nginxMainline.override { zlib = pkgs.zlib; };
services.nginx.appendConfig = '' services.nginx.appendConfig = ''
# use 1 process per core. # use 1 process per core.
# may want to increase worker_connections too, but `ulimit -n` must be increased first. # may want to increase worker_connections too, but `ulimit -n` must be increased first.

View File

@@ -68,6 +68,8 @@
sane.ids.ollama.gid = 2422; sane.ids.ollama.gid = 2422;
sane.ids.bitmagnet.uid = 2423; sane.ids.bitmagnet.uid = 2423;
sane.ids.bitmagnet.gid = 2423; sane.ids.bitmagnet.gid = 2423;
sane.ids.anubis.uid = 2424;
sane.ids.anubis.gid = 2424;
sane.ids.shelvacu.uid = 5431; sane.ids.shelvacu.uid = 5431;
sane.ids.colin.uid = 1000; sane.ids.colin.uid = 1000;