python311Packages.llama-index-core: fix `pythonImportCheck` failures for dependant packages

This commit is contained in:
Malo Bourgon 2024-04-16 13:41:07 -07:00
parent 4231f20d15
commit 2d02c287d3
1 changed files with 27 additions and 0 deletions

View File

@ -6,6 +6,7 @@
deprecated,
dirtyjson,
fetchFromGitHub,
fetchzip,
fsspec,
llamaindex-py-client,
nest-asyncio,
@ -29,6 +30,18 @@
typing-inspect,
}:
let
stopwords = fetchzip {
url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip";
hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY=";
};
punkt = fetchzip {
url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip";
hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos=";
};
in
buildPythonPackage rec {
pname = "llama-index-core";
version = "0.10.29";
@ -45,6 +58,20 @@ buildPythonPackage rec {
sourceRoot = "${src.name}/${pname}";
# When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
# download them if they aren't present.
# https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
# Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
# every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
# solution seems more elegant.
patchPhase = ''
mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/
mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
'';
build-system = [ poetry-core ];
dependencies = [