sane-bt-search: improve sort metrics

This commit is contained in:
2025-07-07 20:05:03 +00:00
parent d5711e7de7
commit 48bd6d304f

View File

@@ -20,6 +20,7 @@ returns select results and magnet links.
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
import argparse
import logging
import json
@@ -40,8 +41,13 @@ epoch = datetime(1970, 1, 1)
logger = logging.getLogger(__name__)
class SortMethod(Enum):
Balanced = "balanced"
Seeders = "seeders"
Tracker = "tracker"
class BadCliArgs(Exception):
def __init__(self, msg: str = None):
def __init__(self, msg: str | None = None) -> None:
helpstr = __doc__
if msg:
super().__init__(f"{msg}\n\n{helpstr}")
@@ -49,7 +55,7 @@ class BadCliArgs(Exception):
super().__init__(helpstr)
def try_parse_time(t: str):
def try_parse_time(t: str) -> datetime:
try:
return datetime.fromisoformat(t)
except ValueError: pass
@@ -65,23 +71,34 @@ def parse_time(t: str) -> datetime:
# preference, best to worst
TRACKER_RANKS = [
'bakabt',
'subsplease',
'nyaa.si',
'miobt',
'yts',
'internet archive',
'the pirate bay',
[ 'BitMagnet (Local DHT)', ],
[
'BakaBT',
'SubsPlease',
'Nyaa.si',
'sukebei.nyaa.si',
],
[ 'YTS', ],
[ 'MioBT', ],
[ 'Internet Archive', ],
[ 'The Pirate Bay', ],
# haven't sorted these
'1337x',
'kickasstorrents.to',
[
'1337x',
'Bengumi Moe',
'kickasstorrents.to',
'Tokyo Toshokan',
'Torlock',
]
]
def tracker_rank(tracker: str):
tracker_ = tracker.lower()
if tracker_ in TRACKER_RANKS:
return TRACKER_RANKS.index(tracker_)
logger.warning(f"unknown tracker: {tracker_!r}")
return len(TRACKER_RANKS)
# returns the tracker rank, as a tuple of (major, minor).
# trackers with the same major rank are _roughly_ equal.
def tracker_rank(tracker: str) -> tuple[int, int]:
for major, trackers in enumerate(TRACKER_RANKS):
if tracker in trackers:
return major, trackers.index(tracker)
logger.warning(f"unknown tracker: {tracker!r}")
return len(TRACKER_RANKS), 0
DROP_CATS = { "dvd", "hd", "misc", "other", "sd", "uhd" }
BOOK_CATS = { "audio", "books", "ebook" }
@@ -249,12 +266,17 @@ class Client:
return sorted(torrents, reverse=True)
def sort_results(torrents: list[Torrent], by: str) -> list[Torrent]:
if by == 'seeders':
return sorted(torrents, key=lambda t: (t.seeders, t), reverse=True)
elif by == 'tracker':
return sorted(torrents, key=lambda t: (-tracker_rank(t.tracker), t), reverse=True)
assert False, f"unknown sort method: {by}"
def sort_results(torrents: list[Torrent], by: SortMethod) -> list[Torrent]:
def key(t: Torrent) -> tuple[int, int, Torrent]:
rank_seeders = -t.seeders
rank_tracker_major, rank_tracker_minor = tracker_rank(t.tracker)
# TODO: `Balanced` should consider `size` and `pub_date`
return {
SortMethod.Balanced: (rank_tracker_major, rank_seeders, rank_tracker_minor, t),
SortMethod.Seeders: (rank_seeders, rank_tracker_major, rank_tracker_minor, t),
SortMethod.Tracker: (rank_tracker_major, rank_tracker_minor, rank_seeders, t),
}[by]
return sorted(torrents, key=key)
def format_results(all_results: list[Torrent], filtered_results: list[Torrent], json: bool):
if json:
@@ -274,7 +296,7 @@ def main(args: list[str]):
parser = argparse.ArgumentParser(description='search torrent trackers')
parser.add_argument('--full', action='store_true', help='show all results')
parser.add_argument('--top', help=f'how many results to show (default: {DEFAULT_RESULT_COUNT})')
parser.add_argument('--sort-by', default='seeders', help='how to rank matches (seeders, tracker)')
parser.add_argument('--sort-by', default=SortMethod.Balanced, type=SortMethod, help='how to rank matches (seeders, tracker)')
parser.add_argument('--json', action='store_true', help='output results in json')
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--book', action='store_true', help='show only book (ebook or audiobook) results')