nixos/paperless: use nltk_data package as NLTK data source

nixos
This commit is contained in:
Leona Maroni 2024-02-01 16:53:15 +01:00
parent 3d195bb859
commit 6300f478e9
No known key found for this signature in database
GPG Key ID: D5B08ADFC75E3605
2 changed files with 7 additions and 19 deletions

View File

@ -268,6 +268,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m
- Custom themes and other assets that were previously stored in `custom/public/*` now belong in `custom/public/assets/*`
- New instances of Gitea using MySQL now ignore the `[database].CHARSET` config option and always use the `utf8mb4` charset, existing instances should migrate via the `gitea doctor convert` CLI command.
- The `services.paperless` module no longer uses the previously downloaded NLTK data stored in `/var/cache/paperless/nltk`. This directory can be removed.
- The `hardware.pulseaudio` module now sets permission of pulse user home directory to 755 when running in "systemWide" mode. It fixes [issue 114399](https://github.com/NixOS/nixpkgs/issues/114399).
- The `btrbk` module now automatically selects and provides required compression

View File

@ -6,7 +6,6 @@ let
pkg = cfg.package;
defaultUser = "paperless";
nltkDir = "/var/cache/paperless/nltk";
defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf";
# Don't start a redis instance if the user sets a custom redis connection
@ -17,13 +16,17 @@ let
PAPERLESS_DATA_DIR = cfg.dataDir;
PAPERLESS_MEDIA_ROOT = cfg.mediaDir;
PAPERLESS_CONSUMPTION_DIR = cfg.consumptionDir;
PAPERLESS_NLTK_DIR = nltkDir;
PAPERLESS_THUMBNAIL_FONT_NAME = defaultFont;
GUNICORN_CMD_ARGS = "--bind=${cfg.address}:${toString cfg.port}";
} // optionalAttrs (config.time.timeZone != null) {
PAPERLESS_TIME_ZONE = config.time.timeZone;
} // optionalAttrs enableRedis {
PAPERLESS_REDIS = "unix://${redisServer.unixSocket}";
} // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) {
PAPERLESS_NLTK_DIR = pkgs.symlinkJoin {
name = "paperless_ngx_nltk_data";
paths = pkg.nltkData;
};
} // (lib.mapAttrs (_: s:
if (lib.isAttrs s || lib.isList s) then builtins.toJSON s
else if lib.isBool s then lib.boolToString s
@ -292,23 +295,6 @@ in
};
};
# Download NLTK corpus data
systemd.services.paperless-download-nltk-data = {
wantedBy = [ "paperless-scheduler.service" ];
before = [ "paperless-scheduler.service" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
serviceConfig = defaultServiceConfig // {
User = cfg.user;
Type = "oneshot";
# Enable internet access
PrivateNetwork = false;
ExecStart = let pythonWithNltk = pkg.python.withPackages (ps: [ ps.nltk ]); in ''
${pythonWithNltk}/bin/python -m nltk.downloader -d '${nltkDir}' punkt snowball_data stopwords
'';
};
};
systemd.services.paperless-consumer = {
description = "Paperless document consumer";
# Bind to `paperless-scheduler` so that the consumer never runs