zimPackages: init

this includes wikipedia snapshots, though currently only their smaller ones (simply because the larger ones will take time to download)
This commit is contained in:
2024-10-09 23:20:13 +00:00
parent d12c10e203
commit f7a21243da
6 changed files with 197 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env nix-shell
#!nix-shell -i python3 -p python3 -p python3.pkgs.beautifulsoup4 -p python3.pkgs.requests
import argparse
import requests
import os
import subprocess
import json
import re
from bs4 import BeautifulSoup
parser = argparse.ArgumentParser(
description="Get all available versions listed for a package in a site."
)
parser.add_argument(
"--pname",
default=os.environ.get("UPDATE_NIX_PNAME"),
required="UPDATE_NIX_PNAME" not in os.environ,
help="name of the package",
)
parser.add_argument(
"--attr-path",
default=os.environ.get("UPDATE_NIX_ATTR_PATH"),
help="attribute path of the package",
)
parser.add_argument("--url", help="url of the page that lists the package versions")
parser.add_argument("--file", help="file name for writing debugging information")
parser.add_argument("--extra-regex", help="additional regex to filter versions with")
if __name__ == "__main__":
args = parser.parse_args()
pname = args.pname
attr_path = args.attr_path or pname
url = args.url or json.loads(
subprocess.check_output(
[
"nix-instantiate",
"--json",
"--eval",
"-E",
f"with import ./. {{}}; dirOf (lib.head {attr_path}.src.urls)",
],
text=True,
)
)
# print a debugging message
if args.file:
with open(args.file, "a") as f:
f.write(f"# Listing versions for {pname} from {url}\n")
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
links = soup.find_all("a")
for link in links:
link_url = link.get("href", None)
if link_url is not None:
match = re.fullmatch(
rf"(.*/)?{args.pname}_([\d.]+?(-[\d\w.-]+?)?)(\.tar)?(\.[^.]*)", link_url
)
if match:
version = match.group(2)
if (not args.extra_regex) or re.fullmatch(args.extra_regex, version):
print(version)

View File

@@ -0,0 +1,46 @@
# heavily based on <repo:nixos/nixpkgs:pkgs/common-updater/scripts.nix>
# and <repo:nixos/nixpkgs:pkgs/common-updater/scripts/list-directory-versions>.
# main difference is that it does fuzzier matching.
{
lib,
genericUpdater,
static-nix-shell,
}:
{
pname ? null,
version ? null,
attrPath ? null,
allowedVersions ? "",
ignoredVersions ? "",
rev-prefix ? "",
odd-unstable ? false,
patchlevel-unstable ? false,
url ? null,
extraRegex ? null,
}:
let
list-directory-versions = static-nix-shell.mkPython3 {
pname = "list-directory-versions";
srcRoot = ./.;
pkgs = [
"python3.pkgs.beautifulsoup4"
"python3.pkgs.requests"
];
};
in
genericUpdater {
inherit
pname
version
attrPath
allowedVersions
ignoredVersions
rev-prefix
odd-unstable
patchlevel-unstable
;
versionLister = "${lib.getExe list-directory-versions} ${
lib.optionalString (url != null) "--url=${lib.escapeShellArg url}"
} ${lib.optionalString (extraRegex != null) "--extra-regex=${lib.escapeShellArg extraRegex}"}";
}

View File

@@ -0,0 +1,51 @@
# primary use case here is wikipedia.
# see list of wikipedia mirrors, which mostly include the .zim files:
# - <https://dumps.wikimedia.org/backup-index.html>
{
directoryListingUpdater2,
fetchurl,
stdenv,
}:
{
pname,
version,
owner ? null, #< same meaning as in e.g. `fetchFromGitHub`
hash ? "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
}@args:
let
tail = "${pname}_${version}.zim";
prefix = if owner != null then "${owner}/" else "";
in
stdenv.mkDerivation (finalAttrs: {
inherit pname version;
src = fetchurl {
urls = [
"https://download.kiwix.org/zim/${prefix}${tail}"
"https://dumps.wikimedia.org/other/kiwix/zim/${prefix}${tail}"
"https://mirror.accum.se/mirror/wikimedia.org/other/kiwix/zim/${prefix}${tail}"
];
inherit hash;
};
dontUnpack = true;
dontBuild = true;
installPhase = ''
runHook preInstall
mkdir -p $out/share/zim
ln -s ${finalAttrs.src} $out/share/zim/${pname}.zim
runHook postInstall
'';
passthru.updateScript = directoryListingUpdater2 {
url = "https://download.kiwix.org/zim/${prefix}";
};
# required so that directoryListingUpdater2 can know in which file the `version` variable can be updated in.
passthru.meta.position = let
position = builtins.unsafeGetAttrPos "version" args;
in
"${position.file}:${toString position.line}";
})

View File

@@ -0,0 +1,18 @@
# .zim files are web dumps (html + images, etc) designed for read-only mirroring.
# so they package search indexes and such too.
# use together with kiwix.
#
# zim downloads:
# - https://mirror.accum.se/mirror/wikimedia.org/other/kiwix/zim
# - https://dumps.wikimedia.org/other/kiwix/zim
# - https://download.kiwix.org/zim
{
lib,
newScope,
}:
lib.recurseIntoAttrs (lib.makeScope newScope (self: with self; {
mkVersionedHttpZim = callPackage ./mkVersionedHttpZim.nix { };
wikipedia_en_100 = callPackage ./wikipedia_en_100.nix { };
wikipedia_en_all_mini = callPackage ./wikipedia_en_all_mini.nix { };
}))

View File

@@ -0,0 +1,6 @@
{ mkVersionedHttpZim }: mkVersionedHttpZim {
owner = "wikipedia";
pname = "wikipedia_en_100";
version = "2024-06";
hash = "sha256-Uafkv2QdVFOXjSUNHI6j6z77+89AL7J5aW93pPsk5wo=";
}

View File

@@ -0,0 +1,6 @@
{ mkVersionedHttpZim }: mkVersionedHttpZim {
owner = "wikipedia";
pname = "wikipedia_en_all_mini";
version = "2024-04";
hash = "sha256-svHyb2hOMvfMnkLLWbon3oZCCG6bv9RdtKbD36et/Ss=";
}