From b579dac4ed3d3d059d2a701df1728bf31a05ee70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sandro=20J=C3=A4ckel?= Date: Fri, 8 Dec 2023 02:53:17 +0100 Subject: [PATCH] nixos/paperless: override enabled tesseract languages with the in paperless configured ones --- nixos/modules/services/misc/paperless.nix | 39 ++++++++++++------- .../office/paperless-ngx/default.nix | 9 +++-- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/nixos/modules/services/misc/paperless.nix b/nixos/modules/services/misc/paperless.nix index 9301d1f68725..5c34ab608bc2 100644 --- a/nixos/modules/services/misc/paperless.nix +++ b/nixos/modules/services/misc/paperless.nix @@ -3,7 +3,6 @@ with lib; let cfg = config.services.paperless; - pkg = cfg.package; defaultUser = "paperless"; defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf"; @@ -25,7 +24,7 @@ let } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) { PAPERLESS_NLTK_DIR = pkgs.symlinkJoin { name = "paperless_ngx_nltk_data"; - paths = pkg.nltkData; + paths = cfg.package.nltkData; }; } // optionalAttrs (cfg.openMPThreadingWorkaround) { OMP_NUM_THREADS = "1"; @@ -38,7 +37,7 @@ let manage = pkgs.writeShellScript "manage" '' set -o allexport # Export the following env vars ${lib.toShellVars env} - exec ${pkg}/bin/paperless-ngx "$@" + exec ${cfg.package}/bin/paperless-ngx "$@" ''; # Secure the services @@ -200,7 +199,17 @@ in description = lib.mdDoc "User under which Paperless runs."; }; - package = mkPackageOption pkgs "paperless-ngx" { }; + package = mkPackageOption pkgs "paperless-ngx" { } // { + apply = pkg: pkg.override { + tesseract5 = pkg.tesseract5.override { + # always enable detection modules + enableLanguages = if cfg.settings ? PAPERLESS_OCR_LANGUAGE then + [ "equ" "osd" ] + ++ lib.splitString "+" cfg.settings.PAPERLESS_OCR_LANGUAGE + else null; + }; + }; + }; openMPThreadingWorkaround = mkEnableOption '' a workaround for document classifier timeouts. @@ -236,7 +245,7 @@ in wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ]; serviceConfig = defaultServiceConfig // { User = cfg.user; - ExecStart = "${pkg}/bin/celery --app paperless beat --loglevel INFO"; + ExecStart = "${cfg.package}/bin/celery --app paperless beat --loglevel INFO"; Restart = "on-failure"; }; environment = env; @@ -248,8 +257,8 @@ in versionFile="${cfg.dataDir}/src-version" version=$(cat "$versionFile" 2>/dev/null || echo 0) - if [[ $version != ${pkg.version} ]]; then - ${pkg}/bin/paperless-ngx migrate + if [[ $version != ${cfg.package.version} ]]; then + ${cfg.package}/bin/paperless-ngx migrate # Parse old version string format for backwards compatibility version=$(echo "$version" | grep -ohP '[^-]+$') @@ -262,10 +271,10 @@ in if versionLessThan 1.12.0; then # Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1 echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade." - ${pkg}/bin/paperless-ngx document_index reindex + ${cfg.package}/bin/paperless-ngx document_index reindex fi - echo ${pkg.version} > "$versionFile" + echo ${cfg.package.version} > "$versionFile" fi '' + optionalString (cfg.passwordFile != null) '' @@ -275,7 +284,7 @@ in superuserStateFile="${cfg.dataDir}/superuser-state" if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then - ${pkg}/bin/paperless-ngx manage_superuser + ${cfg.package}/bin/paperless-ngx manage_superuser echo "$superuserState" > "$superuserStateFile" fi ''; @@ -288,7 +297,7 @@ in after = [ "paperless-scheduler.service" ]; serviceConfig = defaultServiceConfig // { User = cfg.user; - ExecStart = "${pkg}/bin/celery --app paperless worker --loglevel INFO"; + ExecStart = "${cfg.package}/bin/celery --app paperless worker --loglevel INFO"; Restart = "on-failure"; # The `mbind` syscall is needed for running the classifier. SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ]; @@ -319,7 +328,7 @@ in after = [ "paperless-scheduler.service" ]; serviceConfig = defaultServiceConfig // { User = cfg.user; - ExecStart = "${pkg}/bin/paperless-ngx document_consumer"; + ExecStart = "${cfg.package}/bin/paperless-ngx document_consumer"; Restart = "on-failure"; }; environment = env; @@ -351,8 +360,8 @@ in echo "PAPERLESS_SECRET_KEY is empty, refusing to start." exit 1 fi - exec ${pkg.python.pkgs.gunicorn}/bin/gunicorn \ - -c ${pkg}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application + exec ${cfg.package.python.pkgs.gunicorn}/bin/gunicorn \ + -c ${cfg.package}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application ''; serviceConfig = defaultServiceConfig // { User = cfg.user; @@ -368,7 +377,7 @@ in CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; }; environment = env // { - PYTHONPATH = "${pkg.python.pkgs.makePythonPath pkg.propagatedBuildInputs}:${pkg}/lib/paperless-ngx/src"; + PYTHONPATH = "${cfg.package.python.pkgs.makePythonPath cfg.package.propagatedBuildInputs}:${cfg.package}/lib/paperless-ngx/src"; }; # Allow the web interface to access the private /tmp directory of the server. # This is required to support uploading files via the web interface. diff --git a/pkgs/applications/office/paperless-ngx/default.nix b/pkgs/applications/office/paperless-ngx/default.nix index 68dd3ec0f6da..554aeb588ef7 100644 --- a/pkgs/applications/office/paperless-ngx/default.nix +++ b/pkgs/applications/office/paperless-ngx/default.nix @@ -37,8 +37,11 @@ let # https://github.com/NixOS/nixpkgs/issues/298719 # https://github.com/paperless-ngx/paperless-ngx/issues/5494 python = python3.override { - packageOverrides = self: super: { - uvicorn = super.uvicorn.overridePythonAttrs (oldAttrs: { + packageOverrides = final: prev: { + # tesseract5 may be overwritten in the paperless module and we need to propagate that to make the closure reduction effective + ocrmypdf = prev.ocrmypdf.override { tesseract = tesseract5; }; + + uvicorn = prev.uvicorn.overridePythonAttrs (_: { version = "0.25.0"; src = fetchFromGitHub { owner = "encode"; @@ -245,7 +248,7 @@ python.pkgs.buildPythonApplication rec { doCheck = !stdenv.isDarwin; passthru = { - inherit python path frontend; + inherit python path frontend tesseract5; nltkData = with nltk-data; [ punkt snowball_data stopwords ]; tests = { inherit (nixosTests) paperless; }; };