ollama: ship deepseek-coder-v2-16b-lite-instruct-q5_1,qwen2_5-32b-instruct-q2_K

This commit is contained in:
2024-10-13 18:56:05 +00:00
parent 7f5b262801
commit 455b05e99a
5 changed files with 33 additions and 6 deletions

View File

@@ -24,6 +24,7 @@ let
codestral-22b
# deepseek-coder-7b # subpar to deepseek-coder-v2 in nearly every way
deepseek-coder-v2-16b # GREAT balance between speed and code quality. code is superior to qwen2_5 in some ways, and inferior in others
deepseek-coder-v2-16b-lite-instruct-q5_1 # higher-res version of default 16b (so, better answers?)
# falcon2-11b # code examples are lacking
# gemma2-9b # fast, but not great for code
# glm4-9b # it generates invalid code
@@ -40,6 +41,7 @@ let
# phi3_5-3b # generates invalid code
# qwen2_5-7b # notably less quality than 32b (i.e. generates invalid code)
qwen2_5-14b # *almost* same quality to 32b variant, but faster
qwen2_5-32b-instruct-q2_K # lower-res version of default 32b (so, faster?)
qwen2_5-32b # generates 3~5 words/sec, but notably more accurate than coder-7b
# qwen2_5-coder-7b # fast, and concise, but generates invalid code
# solar-pro-22b # generates invalid code

View File

@@ -0,0 +1,11 @@
# <https://ollama.com/library/deepseek-coder-v2>
# - normal 16b is 8.9 GB; 16b-lite-instruct-q5_1 is 12 GB
{ mkOllamaModel }: mkOllamaModel {
modelName = "deepseek-coder-v2";
variant = "16b-lite-instruct-q5_1";
manifestHash = "sha256-8U8LWP7e8jUMjjeooHn9tT/GX9UXIfIJrzGAB8aLRmQ=";
modelBlob = "7d9a741801702d521029cf790c40f9b473e9ecfb5fd65b6eb6b03eb07f8e4ff4";
modelBlobHash = "sha256-fZp0GAFwLVIQKc95DED5tHPp7Ptf1ltutrA+sH+OT/Q=";
paramsBlob = "19f2fb9e8bc65a143f47903ec07dce010fd2873f994b900ea735a4b5022e968d";
paramsBlobHash = "sha256-GfL7novGWhQ/R5A+wH3OAQ/Shz+ZS5AOpzWktQIulo0=";
}

View File

@@ -10,11 +10,11 @@
manifestHash ? "",
# grab the *Blob from the manifest (trim the `sha256:` prefix).
# the manifest can be acquired by providing just the above parameters and building this package, then viewing the output
modelBlob ? null,
modelBlob ? "",
modelBlobHash ? "",
paramsBlob ? null,
paramsBlob ? "",
paramsBlobHash ? "",
systemBlob ? null,
systemBlob ? "",
systemBlobHash ? "",
}:
stdenv.mkDerivation {
@@ -24,17 +24,17 @@ stdenv.mkDerivation {
url = "https://registry.ollama.ai/v2/library/${modelName}/manifests/${variant}";
hash = manifestHash;
})
] ++ lib.optionals (modelBlob != null) [
] ++ lib.optionals (modelBlob != "") [
(fetchurl {
url = "https://registry.ollama.ai/v2/llama/${modelName}:${variant}/blobs/sha256-${modelBlob}";
hash = modelBlobHash;
})
] ++ lib.optionals (paramsBlob != null) [
] ++ lib.optionals (paramsBlob != "") [
(fetchurl {
url = "https://registry.ollama.ai/v2/llama/${modelName}:${variant}/blobs/sha256-${paramsBlob}";
hash = paramsBlobHash;
})
] ++ lib.optionals (systemBlob != null) [
] ++ lib.optionals (systemBlob != "") [
(fetchurl {
url = "https://registry.ollama.ai/v2/llama/${modelName}:${variant}/blobs/sha256-${systemBlob}";
hash = systemBlobHash;

View File

@@ -19,6 +19,7 @@ lib.recurseIntoAttrs (lib.makeScope newScope (self: with self; {
codestral-22b = callPackage ./codestral-22b.nix { };
deepseek-coder-7b = callPackage ./deepseek-coder-7b.nix { };
deepseek-coder-v2-16b = callPackage ./deepseek-coder-v2-16b.nix { };
deepseek-coder-v2-16b-lite-instruct-q5_1 = callPackage ./deepseek-coder-v2-16b-lite-instruct-q5_1.nix { };
falcon2-11b = callPackage ./falcon2-11b.nix { };
gemma2-9b = callPackage ./gemma2-9b.nix { };
glm4-9b = callPackage ./glm4-9b.nix { };
@@ -36,6 +37,7 @@ lib.recurseIntoAttrs (lib.makeScope newScope (self: with self; {
qwen2_5-7b = callPackage ./qwen2_5-7b.nix { };
qwen2_5-14b = callPackage ./qwen2_5-14b.nix { };
qwen2_5-32b = callPackage ./qwen2_5-32b.nix { };
qwen2_5-32b-instruct-q2_K = callPackage ./qwen2_5-32b-instruct-q2_K.nix { };
qwen2_5-coder-7b = callPackage ./qwen2_5-coder-7b.nix { };
solar-pro-22b = callPackage ./solar-pro-22b.nix { };
starcoder2-15b-instruct = callPackage ./starcoder2-15b-instruct.nix { };

View File

@@ -0,0 +1,12 @@
# <https://ollama.com/library/qwen2.5>
# - 32b-instruct-q2_K is a variant of 32b that's 12 GB instead of 20 GB
# 14b-instruct is 9 GB, for comparison
{ mkOllamaModel }: mkOllamaModel {
modelName = "qwen2.5";
variant = "32b-instruct-q2_K";
manifestHash = "sha256-mYGFobYa4TEkAb+xBICAnmieSgYsTT+FbQNxGSWReNI=";
modelBlob = "ed7a653589dcc230070e02058681ad28ba202ddbd6b79e3dcbf23e436d5e8f00";
modelBlobHash = "sha256-7XplNYncwjAHDgIFhoGtKLogLdvWt549y/I+Q21ejwA=";
systemBlob = "66b9ea09bd5b7099cbb4fc820f31b575c0366fa439b08245566692c6784e281e";
systemBlobHash = "sha256-ZrnqCb1bcJnLtPyCDzG1dcA2b6Q5sIJFVmaSxnhOKB4=";
}