python311Packages.tokenizers: 0.15.0 -> 0.19.1

Diff: https://github.com/huggingface/tokenizers/compare/v0.15.0...v0.19.1
This commit is contained in:
Gaetan Lepage 2024-04-20 00:07:38 +02:00
parent 92d295f588
commit 5e766a1a10
2 changed files with 243 additions and 343 deletions

File diff suppressed because it is too large Load Diff

View File

@ -63,16 +63,16 @@ let
in in
buildPythonPackage rec { buildPythonPackage rec {
pname = "tokenizers"; pname = "tokenizers";
version = "0.15.0"; version = "0.19.1";
format = "pyproject"; pyproject = true;
disabled = pythonOlder "3.7"; disabled = pythonOlder "3.7";
src = fetchFromGitHub { src = fetchFromGitHub {
owner = "huggingface"; owner = "huggingface";
repo = pname; repo = "tokenizers";
rev = "v${version}"; rev = "refs/tags/v${version}";
hash = "sha256-+yfX12eKtgZV1OQvPOlMVTONbpFuigHcl4SjoCIZkSk="; hash = "sha256-sKEAt46cdme821tzz9WSKnQb3hPmFJ4zvHgBNRxjEuk=";
}; };
cargoDeps = rustPlatform.importCargoLock { cargoDeps = rustPlatform.importCargoLock {
@ -97,7 +97,13 @@ buildPythonPackage rec {
Security Security
]; ];
propagatedBuildInputs = [ # Cargo.lock is outdated
# TODO: remove at next release
preConfigure = ''
cargo update --offline
'';
dependencies = [
numpy numpy
]; ];
@ -123,6 +129,8 @@ buildPythonPackage rec {
disabledTests = [ disabledTests = [
# Downloads data using the datasets module # Downloads data using the datasets module
"test_encode_special_tokens"
"test_splitting"
"TestTrainFromIterators" "TestTrainFromIterators"
# Those tests require more data # Those tests require more data
"test_from_pretrained" "test_from_pretrained"