servo: gitea: place behind anubis

somebody was aggressively crawling it again, even with robots.txt :(
This commit is contained in:
2025-07-24 22:42:24 +00:00
parent 00acccb9c9
commit 97dfb58bbb
3 changed files with 25 additions and 9 deletions

View File

@@ -1,6 +1,6 @@
# config options: <https://docs.gitea.io/en-us/administration/config-cheat-sheet/>
# TODO: service shouldn't run as `git` user, but as `gitea`
{ pkgs, lib, ... }:
{ config, pkgs, lib, ... }:
{
sane.persist.sys.byStore.private = [
@@ -122,9 +122,17 @@
# services.openssh.settings.UsePAM = true; #< required for `git` user to authenticate
services.anubis.instances."git.uninsane.org".settings.TARGET = "http://127.0.0.1:3000";
# hosted git (web view and for `git <cmd>` use
# TODO: enable publog?
services.nginx.virtualHosts."git.uninsane.org" = {
services.nginx.virtualHosts."git.uninsane.org" = let
# XXX(2025-07-24): gitea's still being crawled, even with robots.txt.
# the load is less than when Anthropic first started, but it's still pretty high (like 600%).
# place behind anubis to prevent AI crawlers from hogging my CPU (gitea is slow to render pages).
proxyPass = "http://unix:${config.services.anubis.instances."git.uninsane.org".settings.BIND}";
# proxyPass = "http://127.0.0.1:3000";
in {
forceSSL = true; # gitea complains if served over a different protocol than its config file says
enableACME = true;
# inherit kTLS;
@@ -133,16 +141,18 @@
'';
locations."/" = {
proxyPass = "http://127.0.0.1:3000";
inherit proxyPass;
recommendedProxySettings = true;
};
# fuck you @anthropic
locations."= /robots.txt".extraConfig = ''
return 200 "User-agent: *\nDisallow: /\n";
'';
# locations."= /robots.txt".extraConfig = ''
# return 200 "User-agent: *\nDisallow: /\n";
# '';
# gitea serves all `raw` files as content-type: plain, but i'd like to serve them as their actual content type.
# or at least, enough to make specific pages viewable (serving unoriginal content as arbitrary content type is dangerous).
locations."~ ^/colin/phone-case-cq/raw/.*.html" = {
proxyPass = "http://127.0.0.1:3000";
inherit proxyPass;
recommendedProxySettings = true;
extraConfig = ''
proxy_hide_header Content-Type;
default_type text/html;
@@ -150,7 +160,8 @@
'';
};
locations."~ ^/colin/phone-case-cq/raw/.*.js" = {
proxyPass = "http://127.0.0.1:3000";
inherit proxyPass;
recommendedProxySettings = true;
extraConfig = ''
proxy_hide_header Content-Type;
default_type text/html;

View File

@@ -22,12 +22,15 @@
};
services.nginx.enable = true;
users.users.nginx.extraGroups = [ "anubis" ];
# nginxStable is one release behind nginxMainline.
# nginx itself recommends running mainline; nixos defaults to stable.
# services.nginx.package = pkgs.nginxMainline;
# XXX(2024-07-31): nixos defaults to zlib-ng -- supposedly more performant, but spams log with
# "gzip filter failed to use preallocated memory: ..."
services.nginx.package = pkgs.nginxMainline.override { zlib = pkgs.zlib; };
# XXX(2025-07-24): "gzip filter" spam is gone => use default nginx package
# services.nginx.package = pkgs.nginxMainline.override { zlib = pkgs.zlib; };
services.nginx.appendConfig = ''
# use 1 process per core.
# may want to increase worker_connections too, but `ulimit -n` must be increased first.

View File

@@ -68,6 +68,8 @@
sane.ids.ollama.gid = 2422;
sane.ids.bitmagnet.uid = 2423;
sane.ids.bitmagnet.gid = 2423;
sane.ids.anubis.uid = 2424;
sane.ids.anubis.gid = 2424;
sane.ids.shelvacu.uid = 5431;
sane.ids.colin.uid = 1000;