diff --git a/modules/services/ollama.nix b/modules/services/ollama.nix index 3d764e963..850e04b3b 100644 --- a/modules/services/ollama.nix +++ b/modules/services/ollama.nix @@ -24,6 +24,7 @@ let codestral-22b # deepseek-coder-7b # subpar to deepseek-coder-v2 in nearly every way deepseek-coder-v2-16b # GREAT balance between speed and code quality. code is superior to qwen2_5 in some ways, and inferior in others + deepseek-coder-v2-16b-lite-instruct-q5_1 # higher-res version of default 16b (so, better answers?) # falcon2-11b # code examples are lacking # gemma2-9b # fast, but not great for code # glm4-9b # it generates invalid code @@ -40,6 +41,7 @@ let # phi3_5-3b # generates invalid code # qwen2_5-7b # notably less quality than 32b (i.e. generates invalid code) qwen2_5-14b # *almost* same quality to 32b variant, but faster + qwen2_5-32b-instruct-q2_K # lower-res version of default 32b (so, faster?) qwen2_5-32b # generates 3~5 words/sec, but notably more accurate than coder-7b # qwen2_5-coder-7b # fast, and concise, but generates invalid code # solar-pro-22b # generates invalid code diff --git a/pkgs/by-name/ollamaPackages/deepseek-coder-v2-16b-lite-instruct-q5_1.nix b/pkgs/by-name/ollamaPackages/deepseek-coder-v2-16b-lite-instruct-q5_1.nix new file mode 100644 index 000000000..cd19d4130 --- /dev/null +++ b/pkgs/by-name/ollamaPackages/deepseek-coder-v2-16b-lite-instruct-q5_1.nix @@ -0,0 +1,11 @@ +# +# - normal 16b is 8.9 GB; 16b-lite-instruct-q5_1 is 12 GB +{ mkOllamaModel }: mkOllamaModel { + modelName = "deepseek-coder-v2"; + variant = "16b-lite-instruct-q5_1"; + manifestHash = "sha256-8U8LWP7e8jUMjjeooHn9tT/GX9UXIfIJrzGAB8aLRmQ="; + modelBlob = "7d9a741801702d521029cf790c40f9b473e9ecfb5fd65b6eb6b03eb07f8e4ff4"; + modelBlobHash = "sha256-fZp0GAFwLVIQKc95DED5tHPp7Ptf1ltutrA+sH+OT/Q="; + paramsBlob = "19f2fb9e8bc65a143f47903ec07dce010fd2873f994b900ea735a4b5022e968d"; + paramsBlobHash = "sha256-GfL7novGWhQ/R5A+wH3OAQ/Shz+ZS5AOpzWktQIulo0="; +} diff --git a/pkgs/by-name/ollamaPackages/mkOllamaModel.nix b/pkgs/by-name/ollamaPackages/mkOllamaModel.nix index 5428ddd74..a0102bf72 100644 --- a/pkgs/by-name/ollamaPackages/mkOllamaModel.nix +++ b/pkgs/by-name/ollamaPackages/mkOllamaModel.nix @@ -10,11 +10,11 @@ manifestHash ? "", # grab the *Blob from the manifest (trim the `sha256:` prefix). # the manifest can be acquired by providing just the above parameters and building this package, then viewing the output - modelBlob ? null, + modelBlob ? "", modelBlobHash ? "", - paramsBlob ? null, + paramsBlob ? "", paramsBlobHash ? "", - systemBlob ? null, + systemBlob ? "", systemBlobHash ? "", }: stdenv.mkDerivation { @@ -24,17 +24,17 @@ stdenv.mkDerivation { url = "https://registry.ollama.ai/v2/library/${modelName}/manifests/${variant}"; hash = manifestHash; }) - ] ++ lib.optionals (modelBlob != null) [ + ] ++ lib.optionals (modelBlob != "") [ (fetchurl { url = "https://registry.ollama.ai/v2/llama/${modelName}:${variant}/blobs/sha256-${modelBlob}"; hash = modelBlobHash; }) - ] ++ lib.optionals (paramsBlob != null) [ + ] ++ lib.optionals (paramsBlob != "") [ (fetchurl { url = "https://registry.ollama.ai/v2/llama/${modelName}:${variant}/blobs/sha256-${paramsBlob}"; hash = paramsBlobHash; }) - ] ++ lib.optionals (systemBlob != null) [ + ] ++ lib.optionals (systemBlob != "") [ (fetchurl { url = "https://registry.ollama.ai/v2/llama/${modelName}:${variant}/blobs/sha256-${systemBlob}"; hash = systemBlobHash; diff --git a/pkgs/by-name/ollamaPackages/package.nix b/pkgs/by-name/ollamaPackages/package.nix index cb4e0fd61..0c39b6ae2 100644 --- a/pkgs/by-name/ollamaPackages/package.nix +++ b/pkgs/by-name/ollamaPackages/package.nix @@ -19,6 +19,7 @@ lib.recurseIntoAttrs (lib.makeScope newScope (self: with self; { codestral-22b = callPackage ./codestral-22b.nix { }; deepseek-coder-7b = callPackage ./deepseek-coder-7b.nix { }; deepseek-coder-v2-16b = callPackage ./deepseek-coder-v2-16b.nix { }; + deepseek-coder-v2-16b-lite-instruct-q5_1 = callPackage ./deepseek-coder-v2-16b-lite-instruct-q5_1.nix { }; falcon2-11b = callPackage ./falcon2-11b.nix { }; gemma2-9b = callPackage ./gemma2-9b.nix { }; glm4-9b = callPackage ./glm4-9b.nix { }; @@ -36,6 +37,7 @@ lib.recurseIntoAttrs (lib.makeScope newScope (self: with self; { qwen2_5-7b = callPackage ./qwen2_5-7b.nix { }; qwen2_5-14b = callPackage ./qwen2_5-14b.nix { }; qwen2_5-32b = callPackage ./qwen2_5-32b.nix { }; + qwen2_5-32b-instruct-q2_K = callPackage ./qwen2_5-32b-instruct-q2_K.nix { }; qwen2_5-coder-7b = callPackage ./qwen2_5-coder-7b.nix { }; solar-pro-22b = callPackage ./solar-pro-22b.nix { }; starcoder2-15b-instruct = callPackage ./starcoder2-15b-instruct.nix { }; diff --git a/pkgs/by-name/ollamaPackages/qwen2_5-32b-instruct-q2_K.nix b/pkgs/by-name/ollamaPackages/qwen2_5-32b-instruct-q2_K.nix new file mode 100644 index 000000000..dfced7d81 --- /dev/null +++ b/pkgs/by-name/ollamaPackages/qwen2_5-32b-instruct-q2_K.nix @@ -0,0 +1,12 @@ +# +# - 32b-instruct-q2_K is a variant of 32b that's 12 GB instead of 20 GB +# 14b-instruct is 9 GB, for comparison +{ mkOllamaModel }: mkOllamaModel { + modelName = "qwen2.5"; + variant = "32b-instruct-q2_K"; + manifestHash = "sha256-mYGFobYa4TEkAb+xBICAnmieSgYsTT+FbQNxGSWReNI="; + modelBlob = "ed7a653589dcc230070e02058681ad28ba202ddbd6b79e3dcbf23e436d5e8f00"; + modelBlobHash = "sha256-7XplNYncwjAHDgIFhoGtKLogLdvWt549y/I+Q21ejwA="; + systemBlob = "66b9ea09bd5b7099cbb4fc820f31b575c0366fa439b08245566692c6784e281e"; + systemBlobHash = "sha256-ZrnqCb1bcJnLtPyCDzG1dcA2b6Q5sIJFVmaSxnhOKB4="; +}