nix-files/pkgs/additional/sane-scripts/src/sane-bt-search

#!/usr/bin/env nix-shell
#!nix-shell -i python3 -p "python3.withPackages (ps: [ ps.natsort ps.requests ])"
# vim: set filetype=python :
"""
usage: sane-bt-search [options] <query_string>

searches Jackett for torrent files matching the title.
returns select results and magnet links.

options:
  --full     display all results
  --help     show this help message and exit
  --manga    show only manga results
  --json     output one json document instead of a human-readable table
  --top=<n>  show the <n> top rated torrents (default: 5)
  --verbose  show more information, useful for debugging/development
"""

# about Jackett
# - source: <https://github.com/Jackett/Jackett>
# - can be queried via APIs:
#   - Torznab: <https://torznab.github.io/spec-1.3-draft/index.html>
#   - TorrentPotato: <https://github.com/RuudBurger/CouchPotatoServer/wiki/Couchpotato-torrent-provider>
#   - its own JSON-based API

from dataclasses import dataclass
from datetime import datetime
import logging
import json
import natsort
import requests
import sys
import time

SERVICE = "https://jackett.uninsane.org"
ENDPOINTS = dict(
    results="api/v2.0/indexers/all/results"
    # results_torznab="api/v2.0/indexers/all/results/torznab"
)

epoch = datetime(1970, 1, 1)

logger = logging.getLogger(__name__)

class BadCliArgs(Exception):
    def __init__(self, msg: str = None):
        helpstr = __doc__
        if msg:
            super().__init__(f"{msg}\n\n{helpstr}")
        else:
            super().__init__(helpstr)


def try_parse_time(t: str):
    try:
        return datetime.fromisoformat(t)
    except ValueError: pass

    if len(t) > len('YYYY-MM-DD'):
        # sometimes these timestamps are encoded with e.g. too many digits in the milliseconds field.
        # so just keep chomping until we get something that parses as a timestamp
        return try_parse_time(t[:-1])

def parse_time(t: str) -> datetime:
    return try_parse_time(t).astimezone() or epoch


DROP_CATS = { "dvd", "hd", "misc", "other", "sd" }
MANGA_CATS = { "books", "comics", "ebook" }
KNOWN_CATS = frozenset(list(MANGA_CATS) + ["anime", "audio", "movies", "tv", "xxx"])
def clean_cat(c: str) -> str | None:
    if c in DROP_CATS: return None
    return c

def is_cat(cats: list[str], wanted_cats: list[str], default: bool = False) -> bool:
    """
    return True if any of the `cats` is in `wanted_cats`.
    in the event there no category is recognized, assume `default`
    """
    if not any(c in KNOWN_CATS for c in cats):
        return default
    else:
        return any(c in wanted_cats for c in cats)

@dataclass(eq=True, order=True, unsafe_hash=True)
class Torrent:
    seeders: int
    pub_date: datetime
    size: int
    tracker: str
    title: str
    magnet: str | None
    http_dl_uri: str | None  # probably a .torrent file but it COULD be a referral to a magnet:// URI
    tracker_uri: str | None
    categories: frozenset[str]  # human-friendly list of categories, lowercase. e.g. ["Books", "Anime"]

    def __str__(self) -> str:
        cats = "/".join(self.categories) if self.categories else "?"
        rows = []
        rows.append(f"{self.seeders}[S]\t{cats}\t{self.tracker}\t{self.pub_date}\t{self.mib}M\t{self.title}")
        if self.tracker_uri:
            rows.append(f"\t{self.tracker_uri}")
        rows.append(f"\t{self.dl_uri}")
        return "\n".join(rows)

    @property
    def dl_uri(self) -> str:
        return self.magnet or self.http_dl_uri

    @property
    def mib(self) -> int:
        return int(round(self.size / 1024 / 1024))

    @staticmethod
    def from_dict(d: dict) -> 'Torrent':
        logger.debug(f"Torrent.from_dict: fields: { ' '.join(d.keys()) }")
        for k, v in d.items():
            if k not in ("CategoryDesc", "Seeders", "PublishDate", "Size", "Tracker", "Title", "MagnetUri", "Guid", "Link", "Details") and \
                    v != None and v != "" and v != [] and v != {}:
                logger.debug(f"  {k} = {v}")

        seeders = d.get("Seeders")
        pub_date = d.get("PublishDate")
        size = d.get("Size")
        tracker = d.get("Tracker")
        title = d.get("Title")
        magnet = d.get("MagnetUri") or d.get("Guid")
        http_dl_uri = d.get("Link")
        tracker_uri = d.get("Details")
        categories = d.get("CategoryDesc", "").replace("/", ",").split(",")
        categories = (c.strip().lower() for c in categories)
        categories = frozenset(clean_cat(c) for c in categories if clean_cat(c))

        if magnet and not magnet.startswith("magnet:"):
            logger.info(f"invalid magnet: {magnet}")
            magnet = None

        if seeders is not None and pub_date is not None and title is not None and (magnet is not None or http_dl_uri is not None):
            pub_date = parse_time(pub_date)
            return Torrent(seeders, pub_date, size, tracker, title, magnet, http_dl_uri, tracker_uri, categories=categories)

    def to_dict(self) -> dict:
        # N.B.: not all fields: needs to be kept in sync with consumers like mx-sanebot
        return dict(
            seeders=self.seeders,
            pub_date=self.pub_date.strftime("%Y-%m-%d"),
            size=self.size,
            tracker=self.tracker,
            title=self.title,
            magnet=self.magnet,
        )

    def is_manga(self, default: bool = False) -> bool:
        return is_cat(self.categories, MANGA_CATS, default)

class Client:
    def __init__(self):
        self.apikey = open("/run/secrets/jackett_apikey").read().strip()

    def api_call(self, method: str, params: dict) -> dict:
        endpoint = ENDPOINTS[method]
        url = f"{SERVICE}/{endpoint}"
        params = params.copy()
        params.update(apikey=self.apikey, _=str(int(time.time())))
        resp = requests.get(url, params=params)
        return resp.json()

    def query(self, q: str) -> list[Torrent]:
        torrents = set()
        api_res = self.api_call("results", dict(Query=q))
        for r in api_res["Results"]:
            t = Torrent.from_dict(r)
            if t is not None:
                torrents.add(t)

        return sorted(torrents, reverse=True)


def filter_results(results: list[Torrent], full: bool, top: int, manga: bool) -> list[Torrent]:
    """
    take the complete query and filter further based on CLI options
    """
    if manga:
        results = [t for t in results if t.is_manga(default=True)]
    if not full:
        results = results[:top]
    return results

def parse_args(args: list[str]) -> dict:
    options = dict(
        full=False,
        help=False,
        json=False,
        query="",
        top="5",
        verbose=False,
        manga=False,
    )
    while args:
        arg = args[0]
        del args[0]
        if arg.startswith('--'):
            opt = arg[2:]
            if "=" in opt:
                name, val = opt.split('=')
            else:
                name, val = opt, True
            options[name] = val
        else:
            options["query"] = options["query"] + " " + arg if options["query"] else arg

    return options

def main(args: list[str]):
    logging.basicConfig()
    options = parse_args(args)
    full = options.pop("full")
    help = options.pop("help")
    json = options.pop("json")
    query = options.pop("query")
    top = int(options.pop("top"))
    verbose = options.pop("verbose")
    manga = options.pop("manga")

    if options != {}:
        raise BadCliArgs(f"unexpected options: {options}")
    if help:
        raise BadCliArgs()

    if verbose:
        logging.getLogger().setLevel(logging.DEBUG)


    client = Client()
    results = client.query(query)
    num_results = len(results)

    results = filter_results(results, full, top, manga)

    if json:
        dumpable = [t.to_dict() for t in results]
        print(json.dumps(dumpable))
    else:
        print(f"found {num_results} result(s)")
        for r in results:
            print(r)

if __name__ == "__main__":
    main(sys.argv[1:])
sane-bt-search: convert to nix-shell and increase verbosity 2023-01-14 10:23:50 +00:00			`#!/usr/bin/env nix-shell`
			`#!nix-shell -i python3 -p "python3.withPackages (ps: [ ps.natsort ps.requests ])"`
sane-bt-search: set vim filetype for highlighting 2023-03-26 08:34:27 +00:00			`# vim: set filetype=python :`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`"""`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`usage: sane-bt-search [options] <query_string>`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
			`searches Jackett for torrent files matching the title.`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`returns select results and magnet links.`

			`options:`
			`--full display all results`
			`--help show this help message and exit`
sane-bt-search: document `--manga` flag 2023-07-10 21:53:55 +00:00			`--manga show only manga results`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`--json output one json document instead of a human-readable table`
sane-bt-search: fix usage for top flag 2023-07-05 23:18:13 +00:00			`--top=<n> show the <n> top rated torrents (default: 5)`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`--verbose show more information, useful for debugging/development`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`"""`

sane-bt-search: include links to the tracker page 2023-06-19 21:01:52 +00:00			`# about Jackett`
			`# - source: <https://github.com/Jackett/Jackett>`
			`# - can be queried via APIs:`
			`# - Torznab: <https://torznab.github.io/spec-1.3-draft/index.html>`
			`# - TorrentPotato: <https://github.com/RuudBurger/CouchPotatoServer/wiki/Couchpotato-torrent-provider>`
			`# - its own JSON-based API`

add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`from dataclasses import dataclass`
			`from datetime import datetime`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`import logging`
sane-bt-search: add `--json` flag to control output format 2023-04-29 08:59:06 +00:00			`import json`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`import natsort`
			`import requests`
			`import sys`
			`import time`

			`SERVICE = "https://jackett.uninsane.org"`
			`ENDPOINTS = dict(`
			`results="api/v2.0/indexers/all/results"`
sane-bt-search: include links to the tracker page 2023-06-19 21:01:52 +00:00			`# results_torznab="api/v2.0/indexers/all/results/torznab"`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`)`

sane-bt-search: handle mis-encoded iso timestamps 2023-05-04 00:36:17 +00:00			`epoch = datetime(1970, 1, 1)`

sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`logger = logging.getLogger(__name__)`

sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`class BadCliArgs(Exception):`
			`def __init__(self, msg: str = None):`
			`helpstr = __doc__`
			`if msg:`
			`super().__init__(f"{msg}\n\n{helpstr}")`
			`else:`
			`super().__init__(helpstr)`


sane-bt-search: handle mis-encoded iso timestamps 2023-05-04 00:36:17 +00:00			`def try_parse_time(t: str):`
			`try:`
			`return datetime.fromisoformat(t)`
			`except ValueError: pass`

			`if len(t) > len('YYYY-MM-DD'):`
			`# sometimes these timestamps are encoded with e.g. too many digits in the milliseconds field.`
			`# so just keep chomping until we get something that parses as a timestamp`
			`return try_parse_time(t[:-1])`

			`def parse_time(t: str) -> datetime:`
			`return try_parse_time(t).astimezone() or epoch`


sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`DROP_CATS = { "dvd", "hd", "misc", "other", "sd" }`
			`MANGA_CATS = { "books", "comics", "ebook" }`
			`KNOWN_CATS = frozenset(list(MANGA_CATS) + ["anime", "audio", "movies", "tv", "xxx"])`
			`def clean_cat(c: str) -> str \| None:`
			`if c in DROP_CATS: return None`
			`return c`

			`def is_cat(cats: list[str], wanted_cats: list[str], default: bool = False) -> bool:`
			`"""`
			return True if any of the `cats` is in `wanted_cats`.
			in the event there no category is recognized, assume `default`
			`"""`
			`if not any(c in KNOWN_CATS for c in cats):`
			`return default`
			`else:`
			`return any(c in wanted_cats for c in cats)`

sane-bt-search: dedupe results 2023-03-26 08:34:52 +00:00			`@dataclass(eq=True, order=True, unsafe_hash=True)`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`class Torrent:`
			`seeders: int`
			`pub_date: datetime`
sane-bt-search: include size and tracker in results 2023-01-20 02:10:07 +00:00			`size: int`
			`tracker: str`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`title: str`
sane-bt-search: clean up type annotations (thanks Ben for the tips!) 2023-07-07 07:12:48 +00:00			`magnet: str \| None`
			`http_dl_uri: str \| None # probably a .torrent file but it COULD be a referral to a magnet:// URI`
			`tracker_uri: str \| None`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`categories: frozenset[str] # human-friendly list of categories, lowercase. e.g. ["Books", "Anime"]`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
			`def __str__(self) -> str:`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`cats = "/".join(self.categories) if self.categories else "?"`
sane-bt-search: include links to the tracker page 2023-06-19 21:01:52 +00:00			`rows = []`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`rows.append(f"{self.seeders}[S]\t{cats}\t{self.tracker}\t{self.pub_date}\t{self.mib}M\t{self.title}")`
sane-bt-search: include links to the tracker page 2023-06-19 21:01:52 +00:00			`if self.tracker_uri:`
			`rows.append(f"\t{self.tracker_uri}")`
			`rows.append(f"\t{self.dl_uri}")`
			`return "\n".join(rows)`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00
			`@property`
			`def dl_uri(self) -> str:`
			`return self.magnet or self.http_dl_uri`
sane-bt-search: include size and tracker in results 2023-01-20 02:10:07 +00:00
			`@property`
			`def mib(self) -> int:`
			`return int(round(self.size / 1024 / 1024))`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
			`@staticmethod`
			`def from_dict(d: dict) -> 'Torrent':`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`logger.debug(f"Torrent.from_dict: fields: { ' '.join(d.keys()) }")`
			`for k, v in d.items():`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`if k not in ("CategoryDesc", "Seeders", "PublishDate", "Size", "Tracker", "Title", "MagnetUri", "Guid", "Link", "Details") and \`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`v != None and v != "" and v != [] and v != {}:`
			`logger.debug(f" {k} = {v}")`

add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`seeders = d.get("Seeders")`
			`pub_date = d.get("PublishDate")`
sane-bt-search: include size and tracker in results 2023-01-20 02:10:07 +00:00			`size = d.get("Size")`
			`tracker = d.get("Tracker")`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`title = d.get("Title")`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`magnet = d.get("MagnetUri") or d.get("Guid")`
			`http_dl_uri = d.get("Link")`
sane-bt-search: include links to the tracker page 2023-06-19 21:01:52 +00:00			`tracker_uri = d.get("Details")`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`categories = d.get("CategoryDesc", "").replace("/", ",").split(",")`
			`categories = (c.strip().lower() for c in categories)`
			`categories = frozenset(clean_cat(c) for c in categories if clean_cat(c))`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00
			`if magnet and not magnet.startswith("magnet:"):`
			`logger.info(f"invalid magnet: {magnet}")`
			`magnet = None`

			`if seeders is not None and pub_date is not None and title is not None and (magnet is not None or http_dl_uri is not None):`
sane-bt-search: handle mis-encoded iso timestamps 2023-05-04 00:36:17 +00:00			`pub_date = parse_time(pub_date)`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`return Torrent(seeders, pub_date, size, tracker, title, magnet, http_dl_uri, tracker_uri, categories=categories)`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
sane-bt-search: add `--json` flag to control output format 2023-04-29 08:59:06 +00:00			`def to_dict(self) -> dict:`
sane-bt-search: include links to the tracker page 2023-06-19 21:01:52 +00:00			`# N.B.: not all fields: needs to be kept in sync with consumers like mx-sanebot`
sane-bt-search: add `--json` flag to control output format 2023-04-29 08:59:06 +00:00			`return dict(`
			`seeders=self.seeders,`
			`pub_date=self.pub_date.strftime("%Y-%m-%d"),`
			`size=self.size,`
			`tracker=self.tracker,`
			`title=self.title,`
			`magnet=self.magnet,`
			`)`

sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`def is_manga(self, default: bool = False) -> bool:`
			`return is_cat(self.categories, MANGA_CATS, default)`

add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`class Client:`
			`def __init__(self):`
sane-bt-search: handle trailing newlines in API key 2023-05-15 10:07:00 +00:00			`self.apikey = open("/run/secrets/jackett_apikey").read().strip()`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
			`def api_call(self, method: str, params: dict) -> dict:`
			`endpoint = ENDPOINTS[method]`
			`url = f"{SERVICE}/{endpoint}"`
			`params = params.copy()`
			`params.update(apikey=self.apikey, _=str(int(time.time())))`
			`resp = requests.get(url, params=params)`
			`return resp.json()`

sane-bt-search: clean up type annotations (thanks Ben for the tips!) 2023-07-07 07:12:48 +00:00			`def query(self, q: str) -> list[Torrent]:`
sane-bt-search: dedupe results 2023-03-26 08:34:52 +00:00			`torrents = set()`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00			`api_res = self.api_call("results", dict(Query=q))`
			`for r in api_res["Results"]:`
			`t = Torrent.from_dict(r)`
			`if t is not None:`
sane-bt-search: dedupe results 2023-03-26 08:34:52 +00:00			`torrents.add(t)`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
			`return sorted(torrents, reverse=True)`

sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00
			`def filter_results(results: list[Torrent], full: bool, top: int, manga: bool) -> list[Torrent]:`
			`"""`
			`take the complete query and filter further based on CLI options`
			`"""`
			`if manga:`
			`results = [t for t in results if t.is_manga(default=True)]`
			`if not full:`
			`results = results[:top]`
			`return results`

sane-bt-search: clean up type annotations (thanks Ben for the tips!) 2023-07-07 07:12:48 +00:00			`def parse_args(args: list[str]) -> dict:`
sane-bt-search: add a `--full` flag 2023-01-20 02:17:59 +00:00			`options = dict(`
			`full=False,`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`help=False,`
sane-bt-search: add `--json` flag to control output format 2023-04-29 08:59:06 +00:00			`json=False,`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`query="",`
			`top="5",`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`verbose=False,`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`manga=False,`
sane-bt-search: add a `--full` flag 2023-01-20 02:17:59 +00:00			`)`
			`while args:`
			`arg = args[0]`
			`del args[0]`
			`if arg.startswith('--'):`
			`opt = arg[2:]`
			`if "=" in opt:`
			`name, val = opt.split('=')`
			`else:`
			`name, val = opt, True`
			`options[name] = val`
			`else:`
			`options["query"] = options["query"] + " " + arg if options["query"] else arg`
add sane-bt-search script to search jackett/torrents 2022-12-26 09:05:26 +00:00
sane-bt-search: add a `--full` flag 2023-01-20 02:17:59 +00:00			`return options`

sane-bt-search: clean up type annotations (thanks Ben for the tips!) 2023-07-07 07:12:48 +00:00			`def main(args: list[str]):`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`logging.basicConfig()`
sane-bt-search: add a `--full` flag 2023-01-20 02:17:59 +00:00			`options = parse_args(args)`
sane-bt-search: report errors for unexpected options 2023-06-13 08:01:52 +00:00			`full = options.pop("full")`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`help = options.pop("help")`
sane-bt-search: report errors for unexpected options 2023-06-13 08:01:52 +00:00			`json = options.pop("json")`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`query = options.pop("query")`
sane-bt-search: fix: parse `--top` as an integer 2023-07-07 07:11:47 +00:00			`top = int(options.pop("top"))`
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`verbose = options.pop("verbose")`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`manga = options.pop("manga")`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00
sane-bt-search: add a --help option 2023-06-15 10:25:59 +00:00			`if options != {}:`
			`raise BadCliArgs(f"unexpected options: {options}")`
			`if help:`
			`raise BadCliArgs()`
sane-bt-search: report errors for unexpected options 2023-06-13 08:01:52 +00:00
			`if verbose:`
sane-bt-search: include non-magnet results 2023-06-08 01:32:19 +00:00			`logging.getLogger().setLevel(logging.DEBUG)`

sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00
sane-bt-search: add a `--full` flag 2023-01-20 02:17:59 +00:00			`client = Client()`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`results = client.query(query)`
			`num_results = len(results)`

			`results = filter_results(results, full, top, manga)`

sane-bt-search: report errors for unexpected options 2023-06-13 08:01:52 +00:00			`if json:`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`dumpable = [t.to_dict() for t in results]`
sane-bt-search: add `--json` flag to control output format 2023-04-29 08:59:06 +00:00			`print(json.dumps(dumpable))`
			`else:`
sane-bt-search: add (limited) ability to search by category 2023-07-07 07:08:17 +00:00			`print(f"found {num_results} result(s)")`
			`for r in results:`
sane-bt-search: add `--json` flag to control output format 2023-04-29 08:59:06 +00:00			`print(r)`
sane-bt-search: add a `--full` flag 2023-01-20 02:17:59 +00:00
			`if __name__ == "__main__":`
			`main(sys.argv[1:])`