From 6300f478e9d9a9a0f0d66004e50e67a107f9cea3 Mon Sep 17 00:00:00 2001 From: Leona Maroni Date: Thu, 1 Feb 2024 16:53:15 +0100 Subject: [PATCH] nixos/paperless: use nltk_data package as NLTK data source nixos --- .../manual/release-notes/rl-2405.section.md | 2 ++ nixos/modules/services/misc/paperless.nix | 24 ++++--------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md index b6af5fc6c3cc..32e2fb569237 100644 --- a/nixos/doc/manual/release-notes/rl-2405.section.md +++ b/nixos/doc/manual/release-notes/rl-2405.section.md @@ -268,6 +268,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m - Custom themes and other assets that were previously stored in `custom/public/*` now belong in `custom/public/assets/*` - New instances of Gitea using MySQL now ignore the `[database].CHARSET` config option and always use the `utf8mb4` charset, existing instances should migrate via the `gitea doctor convert` CLI command. +- The `services.paperless` module no longer uses the previously downloaded NLTK data stored in `/var/cache/paperless/nltk`. This directory can be removed. + - The `hardware.pulseaudio` module now sets permission of pulse user home directory to 755 when running in "systemWide" mode. It fixes [issue 114399](https://github.com/NixOS/nixpkgs/issues/114399). - The `btrbk` module now automatically selects and provides required compression diff --git a/nixos/modules/services/misc/paperless.nix b/nixos/modules/services/misc/paperless.nix index ca34a327dbdf..5193cabe63ac 100644 --- a/nixos/modules/services/misc/paperless.nix +++ b/nixos/modules/services/misc/paperless.nix @@ -6,7 +6,6 @@ let pkg = cfg.package; defaultUser = "paperless"; - nltkDir = "/var/cache/paperless/nltk"; defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf"; # Don't start a redis instance if the user sets a custom redis connection @@ -17,13 +16,17 @@ let PAPERLESS_DATA_DIR = cfg.dataDir; PAPERLESS_MEDIA_ROOT = cfg.mediaDir; PAPERLESS_CONSUMPTION_DIR = cfg.consumptionDir; - PAPERLESS_NLTK_DIR = nltkDir; PAPERLESS_THUMBNAIL_FONT_NAME = defaultFont; GUNICORN_CMD_ARGS = "--bind=${cfg.address}:${toString cfg.port}"; } // optionalAttrs (config.time.timeZone != null) { PAPERLESS_TIME_ZONE = config.time.timeZone; } // optionalAttrs enableRedis { PAPERLESS_REDIS = "unix://${redisServer.unixSocket}"; + } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) { + PAPERLESS_NLTK_DIR = pkgs.symlinkJoin { + name = "paperless_ngx_nltk_data"; + paths = pkg.nltkData; + }; } // (lib.mapAttrs (_: s: if (lib.isAttrs s || lib.isList s) then builtins.toJSON s else if lib.isBool s then lib.boolToString s @@ -292,23 +295,6 @@ in }; }; - # Download NLTK corpus data - systemd.services.paperless-download-nltk-data = { - wantedBy = [ "paperless-scheduler.service" ]; - before = [ "paperless-scheduler.service" ]; - after = [ "network-online.target" ]; - wants = [ "network-online.target" ]; - serviceConfig = defaultServiceConfig // { - User = cfg.user; - Type = "oneshot"; - # Enable internet access - PrivateNetwork = false; - ExecStart = let pythonWithNltk = pkg.python.withPackages (ps: [ ps.nltk ]); in '' - ${pythonWithNltk}/bin/python -m nltk.downloader -d '${nltkDir}' punkt snowball_data stopwords - ''; - }; - }; - systemd.services.paperless-consumer = { description = "Paperless document consumer"; # Bind to `paperless-scheduler` so that the consumer never runs