sane-tag-music: handle more character encoding edgecases
This commit is contained in:
parent
2c66d8cad0
commit
7f08ad01db
|
@ -14,6 +14,34 @@ import mutagen.easyid3
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def clean_for_loose_compare(a: str) -> str:
|
||||
a = a.strip().lower()
|
||||
if a.startswith("the "):
|
||||
a = a[len("the "):]
|
||||
|
||||
# goal is to help merge path-extracted tags with embedded tags.
|
||||
# it's common for a tag to have some rich characters which can't be represented in a file.
|
||||
# so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names
|
||||
omitable = '. &()[];:'
|
||||
unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
||||
|
||||
a = "".join(c for c in a if c not in omitable)
|
||||
cleaned = "".join(c for c in a if c in unomitable)
|
||||
if len(cleaned) >= 0.5 * len(a):
|
||||
return cleaned
|
||||
else:
|
||||
# we cleaned a *suspicious* amount, probably erroneously.
|
||||
# likely a non-english album/artist/track
|
||||
return a
|
||||
|
||||
def loose_compare_str(a: str, b: str) -> bool:
|
||||
return clean_for_loose_compare(a) == clean_for_loose_compare(b)
|
||||
|
||||
def loose_compare_lists(a: list[str], b: list[str]) -> bool:
|
||||
a = sorted(clean_for_loose_compare(i) for i in a)
|
||||
b = sorted(clean_for_loose_compare(i) for i in b)
|
||||
return a == b
|
||||
|
||||
@dataclass
|
||||
class Tags:
|
||||
# format matches mutagen's
|
||||
|
@ -68,6 +96,22 @@ class Tags:
|
|||
tracknumber=tracknumber,
|
||||
)
|
||||
|
||||
def trim_fields(self) -> None:
|
||||
if len(self.title) == 1:
|
||||
self.title = [ self.title[0].strip() ]
|
||||
if len(self.artist) == 1:
|
||||
self.artist = [ self.artist[0].strip() ]
|
||||
if len(self.albumartist) == 1:
|
||||
self.albumartist = [ self.albumartist[0].strip() ]
|
||||
if len(self.album) == 1:
|
||||
self.album = [ self.album[0].strip() ]
|
||||
|
||||
def expand_shorthands(self) -> None:
|
||||
if self.artist == ["V.A."]:
|
||||
self.artist = ["Various Artists"]
|
||||
if self.albumartist == ["V.A."]:
|
||||
self.albumartist = ["Various Artists"]
|
||||
|
||||
def promote_albumartist(self) -> None:
|
||||
"""
|
||||
1. replace shorthands like "V.A." with "Various Artists".
|
||||
|
@ -75,23 +119,14 @@ class Tags:
|
|||
3. if the artist and album artist are nearly identical, try to merge them.
|
||||
"""
|
||||
|
||||
if self.artist == ["V.A."]:
|
||||
self.artist = ["Various Artists"]
|
||||
if self.albumartist == ["V.A."]:
|
||||
self.albumartist = ["Various Artists"]
|
||||
|
||||
unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
||||
if len(self.artist) == len(self.albumartist) == 1:
|
||||
filtered_artist = [i for i in self.artist[0] if i.lower() in unomitable]
|
||||
filtered_albumartist = [i for i in self.albumartist[0] if i.lower() in unomitable]
|
||||
if filtered_artist == filtered_albumartist:
|
||||
# arist & album artist are nearly identical:
|
||||
# probably guessed one of them from filename, which was lacking certain symbols of the actual artist.
|
||||
# recover whichever of these fields had the fewer characters removed (i.e. is longest)
|
||||
if len(self.artist[0]) > len(self.albumartist[0]):
|
||||
self.artist = self.albumartist = self.artist
|
||||
else:
|
||||
self.artist = self.albumartist = self.albumartist
|
||||
if loose_compare_lists(self.artist, self.albumartist):
|
||||
# arist & album artist are nearly identical:
|
||||
# probably guessed one of them from filename, which was lacking certain symbols of the actual artist.
|
||||
# recover whichever of these fields had the fewer characters removed (i.e. is longest)
|
||||
if len("".join(self.artist)) > len("".join(self.albumartist)):
|
||||
self.artist = self.albumartist = self.artist
|
||||
else:
|
||||
self.artist = self.albumartist = self.albumartist
|
||||
|
||||
if self.artist == []:
|
||||
self.artist = self.albumartist
|
||||
|
@ -99,11 +134,12 @@ class Tags:
|
|||
|
||||
def rewrite_singles(self) -> None:
|
||||
""" idiom is for singles to belong to self-titled album. else each artist's singles get merged into one massive album """
|
||||
if self.album == ["Singles"]:
|
||||
if self.albumartist:
|
||||
self.album = self.albumartist
|
||||
else:
|
||||
self.album = self.artist
|
||||
if len(self.album) != 1:
|
||||
return
|
||||
|
||||
for artist in self.albumartist[::-1] + self.artist[::-1]:
|
||||
if loose_compare_str(self.album[0], "Singles") or loose_compare_str(self.album[0], artist):
|
||||
self.album = [ artist ]
|
||||
|
||||
@staticmethod
|
||||
def from_path(p: str) -> 'Tags':
|
||||
|
@ -246,7 +282,10 @@ class Tagger:
|
|||
old_tags = file_.tags_on_disk()
|
||||
|
||||
path_tags = Tags.from_path(path_)
|
||||
# logger.debug(f"extracted tags from {path_}: {path_tags}")
|
||||
new_tags = old_tags.union(path_tags)
|
||||
new_tags.trim_fields()
|
||||
new_tags.expand_shorthands()
|
||||
new_tags.promote_albumartist()
|
||||
new_tags.rewrite_singles()
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user