sane-tag-music: parse album artist - album - track notation

This commit is contained in:
2024-07-09 17:21:53 +00:00
parent ca6bb7518a
commit 2011065497

View File

@@ -78,15 +78,31 @@ import mutagen.oggvorbis
logger = logging.getLogger(__name__)
def romanize(a: str) -> str:
"""
transform `a` in a way which loses only a minimal amount of info
"""
# ampersand, like: PLS&TY; &I
a = a.replace("&", "And")
a = a.replace("かめりあ", "Camellia") # else `unidecode` sets it to kameria
a = unidecode(a)
# these diacritic replacements might be unnecessary now that i'm using unidecode?
a = a.replace("é", "e")
a = a.replace("ä", "a")
a = a.replace("ö", "o")
a = a.replace("ü", "u")
return a
def clean_for_loose_compare(a: str) -> str:
a = romanize(a)
a = a.strip().lower()
if a.startswith("the "):
a = a[len("the "):]
a = a.replace("the", "")
a = a.replace("and", "")
# goal is to help merge path-extracted tags with embedded tags.
# it's common for a tag to have some rich characters which can't be represented in a file.
# so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names
omitable = '. &()[];:'
omitable = '.- &()[];:'
unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
a = "".join(c for c in a if c not in omitable)
@@ -108,15 +124,7 @@ def loose_compare_lists(a: list[str], b: list[str]) -> bool:
def clean_for_fs(a: str, single_field: bool=False) -> str:
preserve = 'abcdefghijklmnopqrstuvwxyz0123456789._-'
# ampersand, like: PLS&TY; &I
a = a.replace("&", "And")
a = a.replace("かめりあ", "Camellia") # else `unidecode` sets it to kameria
a = unidecode(a)
# these diacritic replacements might be unnecessary now that i'm using unidecode?
a = a.replace("é", "e")
a = a.replace("ä", "a")
a = a.replace("ö", "o")
a = a.replace("ü", "u")
a = romanize(a)
a = a.strip()
a = a.replace(" ", ".")
if single_field:
@@ -293,6 +301,7 @@ class Tags:
- trackno - track (handled)
- trackno - artist - track (handled)
- album - artist - title (for Various Artists/compilation albums) (handled)
- artist - album - title (handled)
- artist - album - trackno title (not handled)
track numbering:
- 01, 02, ...
@@ -323,21 +332,30 @@ class Tags:
parse_title(track)
elif len(track_parts) == 2:
if tags.albumartist and loose_compare_str(track_parts[0], tags.albumartist[0]):
# `artist - track`
parse_title(track_parts[1])
elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
# less common, but sometimes `album - track`
# `album - track` (less common)
parse_title(track_parts[1])
elif all(l in '0123456789-' for l in track_parts[0]):
# `trackno - track`
parse_trackno(track_parts[0])
parse_title(track_parts[1])
elif len(track_parts) == 3:
if all(l in '0123456789-' for l in track_parts[0]):
# `trackno - track artist - track`
parse_trackno(track_parts[0])
tags.artist = [track_parts[1]] # explicitly not album artist, but track artist
parse_title(track_parts[2])
elif tags.album == [ track_parts[0] ]:
elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
# `album - track artist - track`
tags.artist = [track_parts[1]]
parse_title(track_parts[2])
elif tags.albumartist and tags.album and \
loose_compare_str(track_parts[0], tags.albumartist[0]) and \
loose_compare_str(track_parts[1], tags.album[0]):
# `album artist - album - track`
parse_title(track_parts[2])
def parse_album(album: str) -> None:
album_parts = [p.strip() for p in album.split(' - ')]