sane-tag-music: parse album artist - album - track notation

2024-07-09 17:21:53 +00:00
parent ca6bb7518a
commit 2011065497
1 changed files with 32 additions and 14 deletions
--- a/pkgs/additional/sane-scripts/src/sane-tag-music
+++ b/pkgs/additional/sane-scripts/src/sane-tag-music
@@ -78,15 +78,31 @@ import mutagen.oggvorbis
 logger = logging.getLogger(__name__)
 def romanize(a: str) -> str:
    """
    transform `a` in a way which loses only a minimal amount of info
    """
    # ampersand, like: PLS&TY; &I
    a = a.replace("&", "And")
    a = a.replace("かめりあ", "Camellia")  # else `unidecode` sets it to kameria
    a = unidecode(a)
    # these diacritic replacements might be unnecessary now that i'm using unidecode?
    a = a.replace("é", "e")
    a = a.replace("ä", "a")
    a = a.replace("ö", "o")
    a = a.replace("ü", "u")
    return a
 def clean_for_loose_compare(a: str) -> str:
    a = romanize(a)
    a = a.strip().lower()
-    if a.startswith("the "):
+    a = a.replace("the", "")
-        a = a[len("the "):]
+    a = a.replace("and", "")
    # goal is to help merge path-extracted tags with embedded tags.
    # it's common for a tag to have some rich characters which can't be represented in a file.
    # so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names
-    omitable = '. &()[];:'
+    omitable = '.- &()[];:'
    unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
    a = "".join(c for c in a if c not in omitable)
@@ -108,15 +124,7 @@ def loose_compare_lists(a: list[str], b: list[str]) -> bool:
 def clean_for_fs(a: str, single_field: bool=False) -> str:
    preserve = 'abcdefghijklmnopqrstuvwxyz0123456789._-'
-    # ampersand, like: PLS&TY; &I
+    a = romanize(a)
    a = a.replace("&", "And")
    a = a.replace("かめりあ", "Camellia")  # else `unidecode` sets it to kameria
    a = unidecode(a)
    # these diacritic replacements might be unnecessary now that i'm using unidecode?
    a = a.replace("é", "e")
    a = a.replace("ä", "a")
    a = a.replace("ö", "o")
    a = a.replace("ü", "u")
    a = a.strip()
    a = a.replace(" ", ".")
    if single_field:
@@ -293,6 +301,7 @@ class Tags:
        - trackno - track (handled)
        - trackno - artist - track (handled)
        - album - artist - title (for Various Artists/compilation albums) (handled)
        - artist - album - title (handled)
        - artist - album - trackno title (not handled)
        track numbering:
        - 01, 02, ...
@@ -323,21 +332,30 @@ class Tags:
                parse_title(track)
            elif len(track_parts) == 2:
                if tags.albumartist and loose_compare_str(track_parts[0], tags.albumartist[0]):
                    # `artist - track`
                    parse_title(track_parts[1])
                elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
-                    # less common, but sometimes `album - track`
+                    # `album - track` (less common)
                    parse_title(track_parts[1])
                elif all(l in '0123456789-' for l in track_parts[0]):
                    # `trackno - track`
                    parse_trackno(track_parts[0])
                    parse_title(track_parts[1])
            elif len(track_parts) == 3:
                if all(l in '0123456789-' for l in track_parts[0]):
                    # `trackno - track artist - track`
                    parse_trackno(track_parts[0])
                    tags.artist = [track_parts[1]]  # explicitly not album artist, but track artist
                    parse_title(track_parts[2])
-                elif tags.album == [ track_parts[0] ]:
+                elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
                    # `album - track artist - track`
                    tags.artist = [track_parts[1]]
                    parse_title(track_parts[2])
                elif tags.albumartist and tags.album and \
                        loose_compare_str(track_parts[0], tags.albumartist[0]) and \
                        loose_compare_str(track_parts[1], tags.album[0]):
                    # `album artist - album - track`
                    parse_title(track_parts[2])
        def parse_album(album: str) -> None:
            album_parts = [p.strip() for p in album.split(' - ')]