From 2d02c287d361f45d92da26d4ec739e49665eaed0 Mon Sep 17 00:00:00 2001 From: Malo Bourgon Date: Tue, 16 Apr 2024 13:41:07 -0700 Subject: [PATCH] python311Packages.llama-index-core: fix `pythonImportCheck` failures for dependant packages --- .../llama-index-core/default.nix | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pkgs/development/python-modules/llama-index-core/default.nix b/pkgs/development/python-modules/llama-index-core/default.nix index 3aae1bc641ff..937b335c0f3d 100644 --- a/pkgs/development/python-modules/llama-index-core/default.nix +++ b/pkgs/development/python-modules/llama-index-core/default.nix @@ -6,6 +6,7 @@ deprecated, dirtyjson, fetchFromGitHub, + fetchzip, fsspec, llamaindex-py-client, nest-asyncio, @@ -29,6 +30,18 @@ typing-inspect, }: +let + stopwords = fetchzip { + url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip"; + hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY="; + }; + + punkt = fetchzip { + url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip"; + hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos="; + }; +in + buildPythonPackage rec { pname = "llama-index-core"; version = "0.10.29"; @@ -45,6 +58,20 @@ buildPythonPackage rec { sourceRoot = "${src.name}/${pname}"; + # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to + # download them if they aren't present. + # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67 + # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in + # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this + # solution seems more elegant. + patchPhase = '' + mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/ + cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/ + + mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/ + cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/ + ''; + build-system = [ poetry-core ]; dependencies = [