sane-tag-music: parse album artist - album - track
notation
This commit is contained in:
@@ -78,15 +78,31 @@ import mutagen.oggvorbis
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def romanize(a: str) -> str:
|
||||||
|
"""
|
||||||
|
transform `a` in a way which loses only a minimal amount of info
|
||||||
|
"""
|
||||||
|
# ampersand, like: PLS&TY; &I
|
||||||
|
a = a.replace("&", "And")
|
||||||
|
a = a.replace("かめりあ", "Camellia") # else `unidecode` sets it to kameria
|
||||||
|
a = unidecode(a)
|
||||||
|
# these diacritic replacements might be unnecessary now that i'm using unidecode?
|
||||||
|
a = a.replace("é", "e")
|
||||||
|
a = a.replace("ä", "a")
|
||||||
|
a = a.replace("ö", "o")
|
||||||
|
a = a.replace("ü", "u")
|
||||||
|
return a
|
||||||
|
|
||||||
def clean_for_loose_compare(a: str) -> str:
|
def clean_for_loose_compare(a: str) -> str:
|
||||||
|
a = romanize(a)
|
||||||
a = a.strip().lower()
|
a = a.strip().lower()
|
||||||
if a.startswith("the "):
|
a = a.replace("the", "")
|
||||||
a = a[len("the "):]
|
a = a.replace("and", "")
|
||||||
|
|
||||||
# goal is to help merge path-extracted tags with embedded tags.
|
# goal is to help merge path-extracted tags with embedded tags.
|
||||||
# it's common for a tag to have some rich characters which can't be represented in a file.
|
# it's common for a tag to have some rich characters which can't be represented in a file.
|
||||||
# so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names
|
# so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names
|
||||||
omitable = '. &()[];:'
|
omitable = '.- &()[];:'
|
||||||
unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
||||||
|
|
||||||
a = "".join(c for c in a if c not in omitable)
|
a = "".join(c for c in a if c not in omitable)
|
||||||
@@ -108,15 +124,7 @@ def loose_compare_lists(a: list[str], b: list[str]) -> bool:
|
|||||||
|
|
||||||
def clean_for_fs(a: str, single_field: bool=False) -> str:
|
def clean_for_fs(a: str, single_field: bool=False) -> str:
|
||||||
preserve = 'abcdefghijklmnopqrstuvwxyz0123456789._-'
|
preserve = 'abcdefghijklmnopqrstuvwxyz0123456789._-'
|
||||||
# ampersand, like: PLS&TY; &I
|
a = romanize(a)
|
||||||
a = a.replace("&", "And")
|
|
||||||
a = a.replace("かめりあ", "Camellia") # else `unidecode` sets it to kameria
|
|
||||||
a = unidecode(a)
|
|
||||||
# these diacritic replacements might be unnecessary now that i'm using unidecode?
|
|
||||||
a = a.replace("é", "e")
|
|
||||||
a = a.replace("ä", "a")
|
|
||||||
a = a.replace("ö", "o")
|
|
||||||
a = a.replace("ü", "u")
|
|
||||||
a = a.strip()
|
a = a.strip()
|
||||||
a = a.replace(" ", ".")
|
a = a.replace(" ", ".")
|
||||||
if single_field:
|
if single_field:
|
||||||
@@ -293,6 +301,7 @@ class Tags:
|
|||||||
- trackno - track (handled)
|
- trackno - track (handled)
|
||||||
- trackno - artist - track (handled)
|
- trackno - artist - track (handled)
|
||||||
- album - artist - title (for Various Artists/compilation albums) (handled)
|
- album - artist - title (for Various Artists/compilation albums) (handled)
|
||||||
|
- artist - album - title (handled)
|
||||||
- artist - album - trackno title (not handled)
|
- artist - album - trackno title (not handled)
|
||||||
track numbering:
|
track numbering:
|
||||||
- 01, 02, ...
|
- 01, 02, ...
|
||||||
@@ -323,21 +332,30 @@ class Tags:
|
|||||||
parse_title(track)
|
parse_title(track)
|
||||||
elif len(track_parts) == 2:
|
elif len(track_parts) == 2:
|
||||||
if tags.albumartist and loose_compare_str(track_parts[0], tags.albumartist[0]):
|
if tags.albumartist and loose_compare_str(track_parts[0], tags.albumartist[0]):
|
||||||
|
# `artist - track`
|
||||||
parse_title(track_parts[1])
|
parse_title(track_parts[1])
|
||||||
elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
|
elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
|
||||||
# less common, but sometimes `album - track`
|
# `album - track` (less common)
|
||||||
parse_title(track_parts[1])
|
parse_title(track_parts[1])
|
||||||
elif all(l in '0123456789-' for l in track_parts[0]):
|
elif all(l in '0123456789-' for l in track_parts[0]):
|
||||||
|
# `trackno - track`
|
||||||
parse_trackno(track_parts[0])
|
parse_trackno(track_parts[0])
|
||||||
parse_title(track_parts[1])
|
parse_title(track_parts[1])
|
||||||
elif len(track_parts) == 3:
|
elif len(track_parts) == 3:
|
||||||
if all(l in '0123456789-' for l in track_parts[0]):
|
if all(l in '0123456789-' for l in track_parts[0]):
|
||||||
|
# `trackno - track artist - track`
|
||||||
parse_trackno(track_parts[0])
|
parse_trackno(track_parts[0])
|
||||||
tags.artist = [track_parts[1]] # explicitly not album artist, but track artist
|
tags.artist = [track_parts[1]] # explicitly not album artist, but track artist
|
||||||
parse_title(track_parts[2])
|
parse_title(track_parts[2])
|
||||||
elif tags.album == [ track_parts[0] ]:
|
elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
|
||||||
|
# `album - track artist - track`
|
||||||
tags.artist = [track_parts[1]]
|
tags.artist = [track_parts[1]]
|
||||||
parse_title(track_parts[2])
|
parse_title(track_parts[2])
|
||||||
|
elif tags.albumartist and tags.album and \
|
||||||
|
loose_compare_str(track_parts[0], tags.albumartist[0]) and \
|
||||||
|
loose_compare_str(track_parts[1], tags.album[0]):
|
||||||
|
# `album artist - album - track`
|
||||||
|
parse_title(track_parts[2])
|
||||||
|
|
||||||
def parse_album(album: str) -> None:
|
def parse_album(album: str) -> None:
|
||||||
album_parts = [p.strip() for p in album.split(' - ')]
|
album_parts = [p.strip() for p in album.split(' - ')]
|
||||||
|
Reference in New Issue
Block a user