doc: Add test for broken links in manpage-urls.json

This commit is contained in:
nicoo 2023-12-18 18:37:16 +00:00
parent 55520e0602
commit b1a96bbbf2
2 changed files with 129 additions and 0 deletions

View File

@ -149,4 +149,26 @@ in pkgs.stdenv.mkDerivation {
echo "doc manual $dest ${common.indexPath}" >> $out/nix-support/hydra-build-products
echo "doc manual $dest nixpkgs-manual.epub" >> $out/nix-support/hydra-build-products
'';
passthru.tests.manpage-urls = with pkgs; testers.invalidateFetcherByDrvHash
({ name ? "manual_check-manpage-urls"
, script
, urlsFile
}: runCommand name {
nativeBuildInputs = [
cacert
(python3.withPackages (p: with p; [
aiohttp
rich
structlog
]))
];
outputHash = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; # Empty output
} ''
python3 ${script} ${urlsFile}
touch $out
'') {
script = ./tests/manpage-urls.py;
urlsFile = ./manpage-urls.json;
};
}

107
doc/tests/manpage-urls.py Executable file
View File

@ -0,0 +1,107 @@
#! /usr/bin/env nix-shell
#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
from argparse import ArgumentParser
from collections import defaultdict
from enum import IntEnum
from http import HTTPStatus
from pathlib import Path
import asyncio, json, logging
import aiohttp, structlog
from structlog.contextvars import bound_contextvars as log_context
LogLevel = IntEnum('LogLevel', {
lvl: getattr(logging, lvl)
for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
})
LogLevel.__str__ = lambda self: self.name
EXPECTED_STATUS=frozenset((
HTTPStatus.OK, HTTPStatus.FOUND,
HTTPStatus.NOT_FOUND,
))
async def check(session, manpage: str, url: str) -> HTTPStatus:
with log_context(manpage=manpage, url=url):
logger.debug("Checking")
async with session.head(url) as resp:
st = HTTPStatus(resp.status)
match st:
case HTTPStatus.OK | HTTPStatus.FOUND:
logger.debug("OK!")
case HTTPStatus.NOT_FOUND:
logger.error("Broken link!")
case _ if st < 400:
logger.info("Unexpected code", status=st)
case _ if 400 <= st < 600:
logger.warn("Unexpected error", status=st)
return st
async def main(urls_path):
logger.info(f"Parsing {urls_path}")
with urls_path.open() as urls_file:
urls = json.load(urls_file)
count = defaultdict(lambda: 0)
logger.info(f"Checking URLs from {urls_path}")
async with aiohttp.ClientSession() as session:
for status in asyncio.as_completed([
check(session, manpage, url)
for manpage, url in urls.items()
]):
count[await status]+=1
ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND]
broken = count[HTTPStatus.NOT_FOUND]
unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS)
logger.info(f"Done: {broken} broken links, "
f"{ok} correct links, and {unknown} unexpected status")
return count
def parse_args(args=None):
parser = ArgumentParser(
prog = 'check-manpage-urls',
description = 'Check the validity of the manpage URLs linked in the nixpkgs manual',
)
parser.add_argument(
'-l', '--log-level',
default = os.getenv('LOG_LEVEL', 'INFO'),
type = lambda s: LogLevel[s],
choices = list(LogLevel),
)
parser.add_argument(
'file',
type = Path,
nargs = '?',
)
return parser.parse_args(args)
if __name__ == "__main__":
import os, sys
args = parse_args()
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(args.log_level),
)
logger = structlog.getLogger("check-manpage-urls.py")
urls_path = args.file
if urls_path is None:
REPO_ROOT = Path(__file__).parent.parent.parent.parent
logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json'
count = asyncio.run(main(urls_path))
sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1)