diff --git a/pkgs/feeds/update.py b/pkgs/feeds/update.py index 98e9eac98..1ac2114e4 100755 --- a/pkgs/feeds/update.py +++ b/pkgs/feeds/update.py @@ -5,11 +5,16 @@ from feedsearch_crawler import search, sort_urls from feedsearch_crawler.crawler import coerce_url import json +import logging import sys url, jsonPath = sys.argv[1:] +logging.getLogger().setLevel(logging.DEBUG) +logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) +logging.getLogger(__name__).debug("logging enabled") + url = coerce_url(url, default_scheme="https") -items = search(url) +items = search(url, total_timeout=180, request_timeout=90, max_content_length=100*1024*1024) items = sort_urls(items) # print all results diff --git a/pkgs/feedsearch-crawler/0001-response-chunk-size.patch b/pkgs/feedsearch-crawler/0001-response-chunk-size.patch new file mode 100644 index 000000000..6a23df6ba --- /dev/null +++ b/pkgs/feedsearch-crawler/0001-response-chunk-size.patch @@ -0,0 +1,13 @@ +diff --git a/feedsearch_crawler/crawler/request.py b/feedsearch_crawler/crawler/request.py +index 70f3d5a..00668ad 100644 +--- a/feedsearch_crawler/crawler/request.py ++++ b/feedsearch_crawler/crawler/request.py +@@ -277,7 +277,7 @@ class Request(Queueable): + """ + body: bytes = b"" + try: +- async for chunk in resp.content.iter_chunked(1024): ++ async for chunk in resp.content.iter_chunked(64 * 1024): + if not chunk: + break + body += chunk diff --git a/pkgs/feedsearch-crawler/default.nix b/pkgs/feedsearch-crawler/default.nix index 3c42b4450..54fdf8e92 100644 --- a/pkgs/feedsearch-crawler/default.nix +++ b/pkgs/feedsearch-crawler/default.nix @@ -28,6 +28,12 @@ buildPythonPackage rec { hash = "sha256-pzvyeXzqdi8pRjk2+QjKhJfgtxbgVT6C08K9fhVFVmY="; }; + patches = [ + # fix for + # where large feeds would timeout in an unrecoverable way + ./0001-response-chunk-size.patch + ]; + nativeBuildInputs = [ poetry-core ];