diff --git a/pkgs/by-name/lo/local-ai/module.nix b/pkgs/by-name/lo/local-ai/module.nix index d7b70048121f..70f486b43b1a 100644 --- a/pkgs/by-name/lo/local-ai/module.nix +++ b/pkgs/by-name/lo/local-ai/module.nix @@ -28,25 +28,33 @@ in type = types.either types.package types.str; default = "models"; }; + + parallelRequests = mkOption { + type = types.int; + default = 1; + }; + + logLevel = mkOption { + type = types.enum [ "error" "warn" "info" "debug" "trace" ]; + default = "warn"; + }; }; config = lib.mkIf cfg.enable { systemd.services.local-ai = { wantedBy = [ "multi-user.target" ]; + environment.LLAMACPP_PARALLEL = toString cfg.parallelRequests; serviceConfig = { DynamicUser = true; ExecStart = lib.escapeShellArgs ([ "${cfg.package}/bin/local-ai" - "--debug" - "--address" - ":${toString cfg.port}" - "--threads" - (toString cfg.threads) - "--localai-config-dir" - "." - "--models-path" - (toString cfg.models) + "--address=:${toString cfg.port}" + "--threads=${toString cfg.threads}" + "--localai-config-dir=." + "--models-path=${cfg.models}" + "--log-level=${cfg.logLevel}" ] + ++ lib.optional (cfg.parallelRequests > 1) "--parallel-requests" ++ cfg.extraArgs); RuntimeDirectory = "local-ai"; WorkingDirectory = "%t/local-ai"; diff --git a/pkgs/by-name/lo/local-ai/package.nix b/pkgs/by-name/lo/local-ai/package.nix index 061122c3f848..c40cb70cee02 100644 --- a/pkgs/by-name/lo/local-ai/package.nix +++ b/pkgs/by-name/lo/local-ai/package.nix @@ -100,8 +100,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "llama.cpp"; - rev = "784e11dea1f5ce9638851b2b0dddb107e2a609c8"; - hash = "sha256-yAQAUo5J+a6O2kTqhFL1UH0tANxpQn3JhAd3MByaC6I="; + rev = "6ecf3189e00a1e8e737a78b6d10e1d7006e050a2"; + hash = "sha256-JS287UdCzj6Es134cbhr8y/AoejMEux0w++/pZ5NejY="; fetchSubmodules = true; }; postPatch = prev.postPatch + '' @@ -254,8 +254,8 @@ let src = fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; - rev = "858452d58dba3acdc3431c9bced2bb8cfd9bf418"; - hash = "sha256-2fT3RgGpBex1mF6GJsVDo4rb0F31YqxTymsXcrpQAZk="; + rev = "8fac6455ffeb0a0950a84e790ddb74f7290d33c4"; + hash = "sha256-Dez/Q2vMvSmscS+BJwkgZ4QG+ebM/N8s1Okd5my0CWI="; }; nativeBuildInputs = [ cmake pkg-config ] @@ -305,8 +305,8 @@ let src = fetchFromGitHub { owner = "mudler"; repo = "go-stable-diffusion"; - rev = "362df9da29f882dbf09ade61972d16a1f53c3485"; - hash = "sha256-A5KvMZOviPsIpPHxM8cacT+qE2x1iFJAbPsRs4sLijY="; + rev = "4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f"; + hash = "sha256-KXUvMP6cDyWib4rG0RmVRm3pgrdsfKXaH3k0v5/mTe8="; fetchSubmodules = true; }; buildFlags = [ "libstablediffusion.a" ]; @@ -342,8 +342,8 @@ let src = fetchFromGitHub { owner = "M0Rf30"; repo = "go-tiny-dream"; - rev = "22a12a4bc0ac5455856f28f3b771331a551a4293"; - hash = "sha256-DAVHD6E0OKHf4C2ldoI0Mm7813DIrmWFONUhSCQPCfc="; + rev = "c04fa463ace9d9a6464313aa5f9cd0f953b6c057"; + hash = "sha256-uow3vbAI4F/fTGjYOKOLqTpKq7NgGYSZhGlEhn7h6s0="; fetchSubmodules = true; }; postUnpack = '' @@ -373,12 +373,12 @@ let stdenv; pname = "local-ai"; - version = "2.13.0"; + version = "2.14.0"; src = fetchFromGitHub { owner = "go-skynet"; repo = "LocalAI"; rev = "v${version}"; - hash = "sha256-jZE8Ow9FFhnx/jvsURLYlYtSuKpE4UWBezxg/mpHs9g="; + hash = "sha256-wr7sTMjGofGiZZbRJ+RfgXx9TM9Adu2NBAXeB3P5Ep0="; }; self = buildGoModule.override { stdenv = effectiveStdenv; } { diff --git a/pkgs/by-name/lo/local-ai/tests.nix b/pkgs/by-name/lo/local-ai/tests.nix index 7cebc6fff938..10895bce6571 100644 --- a/pkgs/by-name/lo/local-ai/tests.nix +++ b/pkgs/by-name/lo/local-ai/tests.nix @@ -6,6 +6,7 @@ , writers , symlinkJoin , jq +, prom2json }: let common-config = { config, ... }: { @@ -14,6 +15,7 @@ let enable = true; package = self; threads = config.virtualisation.cores; + logLevel = "debug"; }; }; @@ -36,6 +38,10 @@ in '' machine.wait_for_open_port(${port}) machine.succeed("curl -f http://localhost:${port}/readyz") + + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") + + machine.copy_from_vm("metrics.json") ''; }); @@ -80,6 +86,10 @@ in machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json") machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json") + + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") + + machine.copy_from_vm("metrics.json") ''; }; @@ -92,6 +102,7 @@ in # https://localai.io/advanced/#full-config-model-file-reference model-configs.${model} = rec { context_size = 8192; + backend = "llama-cpp"; parameters = { # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF # https://ai.meta.com/blog/meta-llama-3/ @@ -157,6 +168,8 @@ in virtualisation.cores = 4; virtualisation.memorySize = 8192; services.local-ai.models = models; + # TODO: Add test case parallel requests + services.local-ai.parallelRequests = 2; }; passthru = { inherit models requests; }; testScript = @@ -180,6 +193,10 @@ in machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json") machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json") + + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") + + machine.copy_from_vm("metrics.json") ''; }; @@ -243,6 +260,10 @@ in machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav") machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json") machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json") + + machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json") + + machine.copy_from_vm("metrics.json") ''; }; }