diff --git a/pkgs/additional/sane-scripts/src/sane-tag-music b/pkgs/additional/sane-scripts/src/sane-tag-music index e0d332d7..15aefd0c 100755 --- a/pkgs/additional/sane-scripts/src/sane-tag-music +++ b/pkgs/additional/sane-scripts/src/sane-tag-music @@ -14,6 +14,34 @@ import mutagen.easyid3 logger = logging.getLogger(__name__) +def clean_for_loose_compare(a: str) -> str: + a = a.strip().lower() + if a.startswith("the "): + a = a[len("the "):] + + # goal is to help merge path-extracted tags with embedded tags. + # it's common for a tag to have some rich characters which can't be represented in a file. + # so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names + omitable = '. &()[];:' + unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789' + + a = "".join(c for c in a if c not in omitable) + cleaned = "".join(c for c in a if c in unomitable) + if len(cleaned) >= 0.5 * len(a): + return cleaned + else: + # we cleaned a *suspicious* amount, probably erroneously. + # likely a non-english album/artist/track + return a + +def loose_compare_str(a: str, b: str) -> bool: + return clean_for_loose_compare(a) == clean_for_loose_compare(b) + +def loose_compare_lists(a: list[str], b: list[str]) -> bool: + a = sorted(clean_for_loose_compare(i) for i in a) + b = sorted(clean_for_loose_compare(i) for i in b) + return a == b + @dataclass class Tags: # format matches mutagen's @@ -68,6 +96,22 @@ class Tags: tracknumber=tracknumber, ) + def trim_fields(self) -> None: + if len(self.title) == 1: + self.title = [ self.title[0].strip() ] + if len(self.artist) == 1: + self.artist = [ self.artist[0].strip() ] + if len(self.albumartist) == 1: + self.albumartist = [ self.albumartist[0].strip() ] + if len(self.album) == 1: + self.album = [ self.album[0].strip() ] + + def expand_shorthands(self) -> None: + if self.artist == ["V.A."]: + self.artist = ["Various Artists"] + if self.albumartist == ["V.A."]: + self.albumartist = ["Various Artists"] + def promote_albumartist(self) -> None: """ 1. replace shorthands like "V.A." with "Various Artists". @@ -75,23 +119,14 @@ class Tags: 3. if the artist and album artist are nearly identical, try to merge them. """ - if self.artist == ["V.A."]: - self.artist = ["Various Artists"] - if self.albumartist == ["V.A."]: - self.albumartist = ["Various Artists"] - - unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789' - if len(self.artist) == len(self.albumartist) == 1: - filtered_artist = [i for i in self.artist[0] if i.lower() in unomitable] - filtered_albumartist = [i for i in self.albumartist[0] if i.lower() in unomitable] - if filtered_artist == filtered_albumartist: - # arist & album artist are nearly identical: - # probably guessed one of them from filename, which was lacking certain symbols of the actual artist. - # recover whichever of these fields had the fewer characters removed (i.e. is longest) - if len(self.artist[0]) > len(self.albumartist[0]): - self.artist = self.albumartist = self.artist - else: - self.artist = self.albumartist = self.albumartist + if loose_compare_lists(self.artist, self.albumartist): + # arist & album artist are nearly identical: + # probably guessed one of them from filename, which was lacking certain symbols of the actual artist. + # recover whichever of these fields had the fewer characters removed (i.e. is longest) + if len("".join(self.artist)) > len("".join(self.albumartist)): + self.artist = self.albumartist = self.artist + else: + self.artist = self.albumartist = self.albumartist if self.artist == []: self.artist = self.albumartist @@ -99,11 +134,12 @@ class Tags: def rewrite_singles(self) -> None: """ idiom is for singles to belong to self-titled album. else each artist's singles get merged into one massive album """ - if self.album == ["Singles"]: - if self.albumartist: - self.album = self.albumartist - else: - self.album = self.artist + if len(self.album) != 1: + return + + for artist in self.albumartist[::-1] + self.artist[::-1]: + if loose_compare_str(self.album[0], "Singles") or loose_compare_str(self.album[0], artist): + self.album = [ artist ] @staticmethod def from_path(p: str) -> 'Tags': @@ -246,7 +282,10 @@ class Tagger: old_tags = file_.tags_on_disk() path_tags = Tags.from_path(path_) + # logger.debug(f"extracted tags from {path_}: {path_tags}") new_tags = old_tags.union(path_tags) + new_tags.trim_fields() + new_tags.expand_shorthands() new_tags.promote_albumartist() new_tags.rewrite_singles()