488 lines
18 KiB
Plaintext
Executable File
488 lines
18 KiB
Plaintext
Executable File
#!/usr/bin/env nix-shell
|
|
#!nix-shell -i python3 -p "python3.withPackages (ps: [ ps.mutagen ])"
|
|
#
|
|
# mutagen docs:
|
|
# - <https://mutagen.readthedocs.io/en/latest/>
|
|
"""
|
|
tool which runs over a complete music library or a subset of it and:
|
|
- detect tags which are missing or likely incorrect
|
|
- write new tags to existing media
|
|
- new tags are specified manually (--artist, --album, ...)
|
|
- OR determined via file path
|
|
|
|
this tool does NOT move or rename files. it only edits tags.
|
|
|
|
USAGE: cd MUSIC_LIBRARY_TOP && sane-tag-music [options] fix-tags|fix-paths [more-options] DIRECTORY [DIRECTORY ...]
|
|
|
|
scans DIRECTORY and guesses artist/album/title for each track, based on path relative to pwd.
|
|
if the guessed tags look more correct than the existing tags (i.e. if the existing file is missing a tag),
|
|
then this updates the tags on-disk to reflect their path.
|
|
|
|
DIRECTORY: specify `.` to scan the entire library.
|
|
|
|
options:
|
|
--dry-run: only show what would be done, don't actually do it.
|
|
--verbose
|
|
--force: apply path-based tag to each file, even those which already have tags (only for fix-tags)
|
|
--album ALBUM manually specify the tag, rather than guessing from path.
|
|
--album-artist ARTIST often combined with DIRECTORY to tag an entire artist or album.
|
|
--artist ARTIST
|
|
--tile TITLE
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import os.path
|
|
import mutagen.easyid3
|
|
import mutagen.flac
|
|
import mutagen.mp3
|
|
import mutagen.oggopus
|
|
import mutagen.oggvorbis
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def clean_for_loose_compare(a: str) -> str:
|
|
a = a.strip().lower()
|
|
if a.startswith("the "):
|
|
a = a[len("the "):]
|
|
|
|
# goal is to help merge path-extracted tags with embedded tags.
|
|
# it's common for a tag to have some rich characters which can't be represented in a file.
|
|
# so just remove rich characters, but in a way which doesn't become useless when faced with primarily non-latin names
|
|
omitable = '. &()[];:'
|
|
unomitable = 'abcdefghijklmnopqrstuvwxyz0123456789'
|
|
|
|
a = "".join(c for c in a if c not in omitable)
|
|
cleaned = "".join(c for c in a if c in unomitable)
|
|
if len(cleaned) >= 0.5 * len(a):
|
|
return cleaned
|
|
else:
|
|
# we cleaned a *suspicious* amount, probably erroneously.
|
|
# likely a non-english album/artist/track
|
|
return a
|
|
|
|
def loose_compare_str(a: str, b: str) -> bool:
|
|
return clean_for_loose_compare(a) == clean_for_loose_compare(b)
|
|
|
|
def loose_compare_lists(a: list[str], b: list[str]) -> bool:
|
|
a = sorted(clean_for_loose_compare(i) for i in a)
|
|
b = sorted(clean_for_loose_compare(i) for i in b)
|
|
return a == b
|
|
|
|
def clean_for_fs(a: str, single_field: bool=False) -> str:
|
|
preserve = 'abcdefghijklmnopqrstuvwxyz0123456789._-'
|
|
a = a.replace(" ", ".")
|
|
if single_field:
|
|
a = a.replace("-", ".")
|
|
|
|
a = "".join(l for l in a if l.lower() in preserve)
|
|
|
|
while ".." in a:
|
|
a = a.replace("..", ".")
|
|
return a
|
|
|
|
@dataclass
|
|
class Tags:
|
|
# format matches mutagen's
|
|
# these tags could be technically valid, but semantically invalid
|
|
# e.g. a tracknumber that's not a number
|
|
artist: list[str]
|
|
album: list[str]
|
|
title: list[str]
|
|
albumartist: list[str]
|
|
tracknumber: list[str]
|
|
|
|
def __init__(
|
|
self,
|
|
artist: list[str] = None,
|
|
album: list[str] = None,
|
|
title: list[str] = None,
|
|
albumartist: list[str] = None,
|
|
tracknumber: list[str] = None,
|
|
):
|
|
self.artist = artist or []
|
|
self.album = album or []
|
|
self.title = title or []
|
|
self.albumartist = albumartist or []
|
|
self.tracknumber = tracknumber or []
|
|
|
|
|
|
def __repr__(self) -> str:
|
|
return f"artist:{self.artist}/{self.albumartist}, album:{self.album}, title:{self.title}, trackno:{self.tracknumber}"
|
|
|
|
def union(self, fallback: 'Tags') -> 'Tags':
|
|
def merge_field(primary: list[str], secondary: list[str]) -> list[str]:
|
|
# primary_lower = [i.lower() for i in primary]
|
|
# return primary + [i for i in secondary if i.lower() not in primary_lower]
|
|
return primary or secondary
|
|
|
|
|
|
artist=merge_field(self.artist, fallback.artist)
|
|
album=merge_field(self.album, fallback.album)
|
|
title=merge_field(self.title, fallback.title)
|
|
albumartist=merge_field(self.albumartist, fallback.albumartist)
|
|
tracknumber=merge_field(self.tracknumber, fallback.tracknumber)
|
|
|
|
if artist == albumartist:
|
|
# if extraneous, then keep the album artist whatever it originally was
|
|
albumartist = self.albumartist
|
|
|
|
return Tags(
|
|
artist=artist,
|
|
album=album,
|
|
title=title,
|
|
albumartist=albumartist,
|
|
tracknumber=tracknumber,
|
|
)
|
|
|
|
def trim_fields(self) -> None:
|
|
if len(self.title) == 1:
|
|
self.title = [ self.title[0].strip() ]
|
|
if len(self.artist) == 1:
|
|
self.artist = [ self.artist[0].strip() ]
|
|
if len(self.albumartist) == 1:
|
|
self.albumartist = [ self.albumartist[0].strip() ]
|
|
if len(self.album) == 1:
|
|
self.album = [ self.album[0].strip() ]
|
|
|
|
def expand_shorthands(self) -> None:
|
|
va = ["V.A.", "Various"]
|
|
|
|
for i, a in enumerate(self.artist):
|
|
if a in va:
|
|
self.artist[i] = "Various Artists"
|
|
|
|
for i, a in enumerate(self.albumartist):
|
|
if a in va:
|
|
self.albumartist[i] = "Various Artists"
|
|
|
|
def promote_albumartist(self) -> None:
|
|
"""
|
|
1. replace shorthands like "V.A." with "Various Artists".
|
|
2. if there's only an album artist, and no track artist, turn the album artist into the track artist.
|
|
3. if the artist and album artist are nearly identical, try to merge them.
|
|
"""
|
|
|
|
if loose_compare_lists(self.artist, self.albumartist):
|
|
# arist & album artist are nearly identical:
|
|
# probably guessed one of them from filename, which was lacking certain symbols of the actual artist.
|
|
# recover whichever of these fields had the fewer characters removed (i.e. is longest)
|
|
if len("".join(self.artist)) > len("".join(self.albumartist)):
|
|
self.artist = self.albumartist = self.artist
|
|
else:
|
|
self.artist = self.albumartist = self.albumartist
|
|
|
|
if self.artist == []:
|
|
self.artist = self.albumartist
|
|
self.albumartist = []
|
|
|
|
def rewrite_singles(self) -> None:
|
|
""" idiom is for singles to belong to self-titled album. else each artist's singles get merged into one massive album """
|
|
if len(self.album) != 1:
|
|
return
|
|
|
|
for artist in self.albumartist[::-1] + self.artist[::-1]:
|
|
if loose_compare_str(self.album[0], "Singles") or loose_compare_str(self.album[0], artist):
|
|
self.album = [ artist ]
|
|
|
|
def to_path(self, ext: str) -> str | None:
|
|
artist = self.albumartist or self.artist
|
|
if not (artist and self.album and self.tracknumber and self.title and ext):
|
|
return None
|
|
|
|
artist = clean_for_fs(artist[0], single_field=False)
|
|
album = clean_for_fs(self.album[0], single_field=True)
|
|
trackno = clean_for_fs(self.tracknumber[0], single_field=True)
|
|
title_ext = clean_for_fs(self.title[0] + f".{ext}", single_field=True)
|
|
return f"{artist}/{album}/{trackno}-{title_ext}"
|
|
|
|
@staticmethod
|
|
def from_path(p: str) -> 'Tags':
|
|
"""
|
|
path cases:
|
|
- artist/album/track
|
|
- label/artist - album/track (in this case "label" is ignored)
|
|
track naming:
|
|
- could have many fields. the title will always be last. trackno could be embedded or not.
|
|
- title (handled)
|
|
- artist - track (handled)
|
|
- album - track (handled)
|
|
- trackno - track (handled)
|
|
- trackno - artist - track (handled)
|
|
- album - artist - title (for Various Artists/compilation albums) (handled)
|
|
- artist - album - trackno title (not handled)
|
|
track numbering:
|
|
- 01, 02, ...
|
|
- 1-01, 1-02, ... 2-01, 2-02, ... (for A-side/B-side)
|
|
additionally, clean the path before this logic:
|
|
- ./artist/album/track -> artist/album/track
|
|
"""
|
|
tags = Tags()
|
|
def parse_trackno(trackno: str) -> None:
|
|
tags.tracknumber = [trackno.lstrip('0')]
|
|
|
|
def parse_title(title: str) -> None:
|
|
new_title = title
|
|
# maybe the filename has some identifier (e.g. soundcloud): remove it
|
|
while new_title and new_title[-1] in '0123456789':
|
|
new_title = new_title[:-1]
|
|
if new_title and new_title[-1] == '-':
|
|
new_title = new_title[:-1]
|
|
if len(title) - len(new_title) < 5:
|
|
# we stripped too little, probably not an identifier. undo it.
|
|
new_title = title
|
|
tags.title = [ new_title ]
|
|
|
|
def parse_track(track: str) -> None:
|
|
track = os.path.splitext(track)[0]
|
|
track_parts = [p.strip() for p in track.split(' - ')]
|
|
if len(track_parts) == 1:
|
|
parse_title(track)
|
|
elif len(track_parts) == 2:
|
|
if tags.albumartist and loose_compare_str(track_parts[0], tags.albumartist[0]):
|
|
parse_title(track_parts[1])
|
|
elif tags.album and loose_compare_str(track_parts[0], tags.album[0]):
|
|
# less common, but sometimes `album - track`
|
|
parse_title(track_parts[1])
|
|
elif all(l in '0123456789-' for l in track_parts[0]):
|
|
parse_trackno(track_parts[0])
|
|
parse_title(track_parts[1])
|
|
elif len(track_parts) == 3:
|
|
if all(l in '0123456789-' for l in track_parts[0]):
|
|
parse_trackno(track_parts[0])
|
|
tags.artist = [track_parts[1]] # explicitly not album artist, but track artist
|
|
parse_title(track_parts[2])
|
|
elif tags.album == [ track_parts[0] ]:
|
|
tags.artist = [track_parts[1]]
|
|
parse_title(track_parts[2])
|
|
|
|
def parse_album(album: str) -> None:
|
|
album_parts = [p.strip() for p in album.split(' - ')]
|
|
if len(album_parts) == 1:
|
|
# artist/album/track
|
|
tags.album = [album]
|
|
elif len(album_parts) == 2:
|
|
# artist/artist-album/track
|
|
tags.albumartist = [album_parts[0]]
|
|
tags.album = [album_parts[1]]
|
|
|
|
comps = [c for c in p.split('/') if c != '.']
|
|
|
|
if len(comps) == 3:
|
|
tags.albumartist = [comps[0]]
|
|
parse_album(comps[1])
|
|
parse_track(comps[2])
|
|
elif len(comps) == 2:
|
|
tags.albumartist = [comps[0]]
|
|
parse_track(comps[1])
|
|
|
|
return tags
|
|
|
|
|
|
class AudioFile:
|
|
def __init__(self, path_: str):
|
|
self.path_ = path_
|
|
self.muta = None
|
|
|
|
_base, ext = os.path.splitext(path_)
|
|
try:
|
|
# TODO: handle:
|
|
# - .m4a
|
|
# - .wav
|
|
# - .wma
|
|
if ext == '.flac':
|
|
self.muta = mutagen.flac.Open(path_)
|
|
elif ext == '.aac':
|
|
# TODO: this seems to only read tags, and not create them?
|
|
self.muta = mutagen.easyid3.EasyID3(path_)
|
|
elif ext == '.mp3':
|
|
self.muta = mutagen.mp3.EasyMP3(path_)
|
|
elif ext == '.ogg':
|
|
self.muta = mutagen.oggvorbis.OggVorbis(path_)
|
|
elif ext == '.opus':
|
|
self.muta = mutagen.oggopus.OggOpus(path_)
|
|
else:
|
|
logger.debug(f"no metadata handler for {path_}")
|
|
except Exception as e:
|
|
logger.warning(f"failed to open {path_}: {e}")
|
|
|
|
@staticmethod
|
|
def new(path_: str) -> 'AudioFile':
|
|
f = AudioFile(path_)
|
|
if f.muta is not None:
|
|
return f
|
|
|
|
def tags_on_disk(self) -> Tags:
|
|
return Tags(
|
|
artist=self.muta.get('artist', []) if self.muta else [],
|
|
album=self.muta.get('album', []) if self.muta else [],
|
|
title=self.muta.get('title', []) if self.muta else [],
|
|
albumartist=self.muta.get('albumartist', []) if self.muta else [],
|
|
tracknumber=self.muta.get('tracknumber', []) if self.muta else [],
|
|
)
|
|
|
|
def write_tags(self, tags: Tags) -> bool:
|
|
if self.muta is None:
|
|
logger.debug(f"not writing tags: no metadata handler: {self.path_}")
|
|
return False
|
|
|
|
def set_tag(name: str, val: list):
|
|
if val:
|
|
self.muta[name] = val
|
|
elif name in self.muta:
|
|
del self.muta[name]
|
|
|
|
set_tag('artist', tags.artist)
|
|
set_tag('album', tags.album)
|
|
set_tag('title', tags.title)
|
|
set_tag('albumartist', tags.albumartist)
|
|
set_tag('tracknumber', tags.tracknumber)
|
|
|
|
logger.debug(f"writing full tags: {self.muta}")
|
|
|
|
self.muta.save()
|
|
|
|
class Tagger:
|
|
def __init__(self, dry_run: bool, force: bool, manual_tags: Tags):
|
|
self.dry_run = dry_run
|
|
self.force = force
|
|
self.manual_tags = manual_tags
|
|
|
|
def tag_file(self, path_: str) -> None:
|
|
file_ = AudioFile.new(path_)
|
|
if not file_:
|
|
logger.debug(f"skipping unsupported file: {path_}")
|
|
return
|
|
|
|
old_tags = file_.tags_on_disk()
|
|
path_tags = Tags.from_path(path_)
|
|
additional_tags = self.manual_tags.union(path_tags)
|
|
if self.force:
|
|
new_tags = additional_tags.union(old_tags)
|
|
else:
|
|
new_tags = old_tags.union(additional_tags)
|
|
new_tags = new_tags.union(self.manual_tags)
|
|
new_tags.trim_fields()
|
|
new_tags.expand_shorthands()
|
|
new_tags.promote_albumartist()
|
|
new_tags.rewrite_singles()
|
|
|
|
if new_tags == old_tags:
|
|
return self.skip_unchanged(path_, old_tags)
|
|
|
|
self.show_tagdif(path_, old_tags, new_tags)
|
|
|
|
if self.confirm():
|
|
if self.guard_dry_run("writing tags"):
|
|
file_.write_tags(new_tags)
|
|
|
|
def fix_path(self, path_: str) -> None:
|
|
file_ = AudioFile.new(path_)
|
|
if not file_:
|
|
logger.debug(f"skipping unsupported file: {path_}")
|
|
return
|
|
|
|
tags = self.manual_tags.union(file_.tags_on_disk())
|
|
new_path = tags.to_path(os.path.splitext(path_)[1])
|
|
if new_path is None:
|
|
logger.debug(f"skipping untagged file: {path_}")
|
|
logger.debug(f" {tags}")
|
|
return
|
|
|
|
if new_path == path_:
|
|
return self.skip_unchanged(path_, tags)
|
|
|
|
if self.confirm():
|
|
if self.guard_dry_run(f"moving file: {path_} -> {new_path}"):
|
|
# os.renames creates the necessary parents, and then prunes leaf directories
|
|
os.renames(path_, new_path)
|
|
|
|
def show_tagdif(self, path_: str, old_tags: Tags, new_tags: Tags):
|
|
logger.info(f"updating tags for {path_}")
|
|
logger.info(f" {old_tags}")
|
|
logger.info(f" -> {new_tags}")
|
|
|
|
def skip_unchanged(self, path_: str, tags: Tags):
|
|
logger.debug(f"skipping unchanged {path_}")
|
|
logger.debug(f" {tags}")
|
|
|
|
def confirm(self) -> bool:
|
|
# TODO: actually prompt
|
|
return True
|
|
|
|
def guard_dry_run(self, msg: str) -> bool:
|
|
if self.dry_run:
|
|
print(f"dry run: not {msg}")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def walk_files(*roots: str) -> None:
|
|
for root in roots:
|
|
if os.path.isdir(root):
|
|
for dir_, subdirs, files_ in os.walk(root):
|
|
for f in files_:
|
|
yield os.path.join(dir_, f)
|
|
else:
|
|
yield root
|
|
|
|
|
|
def main():
|
|
logging.basicConfig()
|
|
logging.getLogger().setLevel(logging.INFO)
|
|
|
|
parser = argparse.ArgumentParser(description="augment music tags based on library path")
|
|
parser.add_argument('--dry-run', action='store_true')
|
|
parser.add_argument('--verbose', action='store_true')
|
|
parser.add_argument('--album', help="manually specify the tag")
|
|
parser.add_argument('--album-artist', help="manually specify the tag")
|
|
parser.add_argument('--artist', help="manually specify the tag")
|
|
parser.add_argument('--title', help="manually specify the tag")
|
|
|
|
subparsers = parser.add_subparsers(help="operation")
|
|
|
|
fix_tags_parser = subparsers.add_parser("fix-tags")
|
|
fix_tags_parser.set_defaults(subcommand="fix_tags")
|
|
fix_tags_parser.add_argument('--force', action='store_true', help="give higher credence to path-based and manual tags than any existing tags")
|
|
fix_tags_parser.add_argument("path", nargs="+", help="relative path to a file to tag")
|
|
|
|
fix_paths_parser = subparsers.add_parser("fix-paths")
|
|
fix_paths_parser.set_defaults(subcommand="fix_paths")
|
|
fix_paths_parser.add_argument("path", nargs="+", help="relative path to a file to tag")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
files = list(walk_files(*args.path))
|
|
manual_tags = Tags(
|
|
album=[args.album] if args.album else [],
|
|
albumartist=[args.album_artist] if args.album_artist else [],
|
|
artist=[args.artist] if args.artist else [],
|
|
title=[args.title] if args.title else [],
|
|
)
|
|
|
|
tagger = Tagger(
|
|
dry_run=args.dry_run,
|
|
force=getattr(args, "force", False),
|
|
manual_tags=manual_tags,
|
|
)
|
|
|
|
if args.subcommand == "fix_tags":
|
|
for p in files:
|
|
tagger.tag_file(p)
|
|
elif args.subcommand == "fix_paths":
|
|
for p in files:
|
|
print(p)
|
|
tagger.fix_path(p)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|