sane-sync-music: add --compress and --compat options
This commit is contained in:
parent
ab020327f4
commit
4caf61387e
|
@ -13,56 +13,163 @@ from unidecode import unidecode
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
MAKE_MP3 = [
|
LOSSLESS_FMTS = [
|
||||||
'.flac',
|
'.flac',
|
||||||
|
'.wav',
|
||||||
|
]
|
||||||
|
|
||||||
|
MAYBE_LOSSY_FMTS = [
|
||||||
|
# WMA can be lossy or lossless
|
||||||
|
'.wma',
|
||||||
|
]
|
||||||
|
|
||||||
|
LOSSY_FMTS = [
|
||||||
|
'.aac',
|
||||||
|
'.m4a',
|
||||||
|
'.mp3',
|
||||||
'.oga',
|
'.oga',
|
||||||
'.ogg',
|
'.ogg',
|
||||||
'.opus',
|
'.opus',
|
||||||
'.wav',
|
|
||||||
'.wma',
|
|
||||||
]
|
]
|
||||||
COPY_RAW = [
|
|
||||||
'.aac',
|
COMPAT_AUDIO_FMTS = [
|
||||||
|
'.flac',
|
||||||
|
'.mp3',
|
||||||
|
'.oga',
|
||||||
|
'.ogg',
|
||||||
|
'.opus',
|
||||||
|
]
|
||||||
|
|
||||||
|
AUDIO_FMTS = LOSSLESS_FMTS + MAYBE_LOSSY_FMTS + LOSSY_FMTS
|
||||||
|
|
||||||
|
IMAGE_FMTS = [
|
||||||
'.bmp',
|
'.bmp',
|
||||||
'.gif',
|
'.gif',
|
||||||
'.jpeg',
|
'.jpeg',
|
||||||
'.jpg',
|
'.jpg',
|
||||||
'.m4a',
|
|
||||||
'.mp3',
|
|
||||||
'.png',
|
'.png',
|
||||||
]
|
]
|
||||||
IGNORE = [
|
|
||||||
'.DS_Store',
|
VIDEO_FMTS = [
|
||||||
'.avi',
|
'.avi',
|
||||||
'.cue',
|
|
||||||
'.inf',
|
|
||||||
'.log',
|
|
||||||
'.lyrics',
|
|
||||||
'.m3u',
|
|
||||||
'.mov',
|
'.mov',
|
||||||
'.mp4',
|
'.mp4',
|
||||||
'.nsf_',
|
|
||||||
'.pdf',
|
|
||||||
'.toc',
|
|
||||||
'.txt',
|
|
||||||
'.webm',
|
'.webm',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
METADATA_FMTS = [
|
||||||
|
'.inf',
|
||||||
|
'.lyrics',
|
||||||
|
'.nfo',
|
||||||
|
'.pdf',
|
||||||
|
'.toc',
|
||||||
|
'.txt',
|
||||||
|
]
|
||||||
|
|
||||||
|
NON_AUDIO_FMTS = IMAGE_FMTS + VIDEO_FMTS + METADATA_FMTS
|
||||||
|
|
||||||
|
IGNORE = [
|
||||||
|
'.DS_Store',
|
||||||
|
'.cue',
|
||||||
|
'.log',
|
||||||
|
'.m3u',
|
||||||
|
'.nsf_',
|
||||||
|
]
|
||||||
|
|
||||||
|
def clean_name(path: Path) -> Path:
|
||||||
|
'''
|
||||||
|
transform a path into something which most filesystems/protocols can reliably support.
|
||||||
|
also removes irregularities like uppercase file extensions.
|
||||||
|
'''
|
||||||
|
out_path = Path()
|
||||||
|
for part in path.parts:
|
||||||
|
blacklisted = '"\'!@#$%^&*()[]{};:,<>?`~|\\/'
|
||||||
|
part = unidecode(part)
|
||||||
|
part = ''.join(c for c in part if c not in blacklisted)
|
||||||
|
out_path /= part
|
||||||
|
|
||||||
|
return out_path.with_suffix(out_path.suffix.lower())
|
||||||
|
|
||||||
|
class TranscodePreferences:
|
||||||
|
def __init__(self, compress: bool, compat: bool):
|
||||||
|
self.compress = compress
|
||||||
|
self.compat = compat
|
||||||
|
|
||||||
|
def get_output(self, input_ext: str) -> str | None:
|
||||||
|
"""
|
||||||
|
for some source type (e.g. `.wav`), return the desired output type (e.g. `.mp3`).
|
||||||
|
returns `.null` to indicate the file shouldn't be copied.
|
||||||
|
returns `None` if i don't understand the source file.
|
||||||
|
"""
|
||||||
|
desired_output = None
|
||||||
|
|
||||||
|
if input_ext in AUDIO_FMTS:
|
||||||
|
desired_output = input_ext
|
||||||
|
if self.compress:
|
||||||
|
desired_output = self.get_compressed_audio_output(desired_output)
|
||||||
|
if self.compat:
|
||||||
|
desired_output = self.get_compat_audio_output(desired_output)
|
||||||
|
elif input_ext in IMAGE_FMTS:
|
||||||
|
desired_output = input_ext
|
||||||
|
elif input_ext in VIDEO_FMTS:
|
||||||
|
desired_output = input_ext
|
||||||
|
elif input_ext in METADATA_FMTS:
|
||||||
|
desired_output = input_ext
|
||||||
|
elif input_ext in IGNORE:
|
||||||
|
desired_output = ".null"
|
||||||
|
|
||||||
|
return desired_output
|
||||||
|
|
||||||
|
def desired_samplerate(self, input_samplerate: int | None) -> int | None:
|
||||||
|
samplerate_map = {
|
||||||
|
192000: 48000 if self.compress else 192000,
|
||||||
|
96000: 48000 if self.compress else 96000,
|
||||||
|
88200: 44100 if self.compress else 88200,
|
||||||
|
# preserve as-is
|
||||||
|
48000: 48000,
|
||||||
|
44100: 44100,
|
||||||
|
}
|
||||||
|
return samplerate_map.get(input_samplerate)
|
||||||
|
|
||||||
|
def get_compressed_audio_output(self, input_ext: str) -> str:
|
||||||
|
if input_ext in LOSSY_FMTS:
|
||||||
|
return input_ext
|
||||||
|
else:
|
||||||
|
return ".mp3"
|
||||||
|
|
||||||
|
def get_compat_audio_output(self, input_ext: str) -> str:
|
||||||
|
if input_ext in COMPAT_AUDIO_FMTS:
|
||||||
|
return input_ext
|
||||||
|
elif input_ext in LOSSLESS_FMTS:
|
||||||
|
return ".flac"
|
||||||
|
else:
|
||||||
|
return ".mp3"
|
||||||
|
|
||||||
|
|
||||||
class Encoder:
|
class Encoder:
|
||||||
def __init__(self, dry_run: bool = False):
|
def __init__(self, prefs: TranscodePreferences, dry_run: bool = False):
|
||||||
|
self.prefs = prefs
|
||||||
self.dry_run = dry_run
|
self.dry_run = dry_run
|
||||||
|
|
||||||
def check_output_no_sideeffect(self, args: list[str]) -> bytes:
|
def _check_output(self, args: list[str], quiet: bool = False) -> bytes:
|
||||||
return subprocess.check_output(args)
|
res = subprocess.run(args, capture_output=True)
|
||||||
|
|
||||||
def check_output(self, args: list[str]) -> str:
|
stderr = res.stderr.strip()
|
||||||
if self.dry_run:
|
if stderr and not quiet:
|
||||||
|
logger.error(stderr)
|
||||||
|
|
||||||
|
res.check_returncode()
|
||||||
|
return res.stdout
|
||||||
|
|
||||||
|
def check_output(self, args: list[str], has_side_effect=True, **kwargs) -> str:
|
||||||
|
if self.dry_run and has_side_effect:
|
||||||
logger.debug("not invoking because dry run: " + ' '.join(args))
|
logger.debug("not invoking because dry run: " + ' '.join(args))
|
||||||
return b""
|
return b""
|
||||||
else:
|
else:
|
||||||
return subprocess.check_output(args)
|
return self._check_output(args, **kwargs)
|
||||||
|
|
||||||
def cp(self, source: Path, dest: Path) -> None:
|
def cp(self, source: Path, dest: Path) -> None:
|
||||||
|
logger.info(f'copying {source} -> {dest}')
|
||||||
self.check_output(['cp', str(source), str(dest)])
|
self.check_output(['cp', str(source), str(dest)])
|
||||||
|
|
||||||
def ensure_dir(self, dir: Path) -> None:
|
def ensure_dir(self, dir: Path) -> None:
|
||||||
|
@ -78,51 +185,52 @@ class Encoder:
|
||||||
else:
|
else:
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
|
|
||||||
def convert(self, source: Path, dest: Path) -> None:
|
def convert(self, source: Path, dest: Path, target_samplerate: int | None) -> None:
|
||||||
source_samplerate = None
|
assert dest.suffix == '.mp3', "conversion to a target other than mp3 not yet supported"
|
||||||
try:
|
logger.info(f'converting {source} -> {dest}')
|
||||||
source_samplerate = int(self.check_output_no_sideeffect(['soxi', '-r', str(source)]).decode("utf-8").strip())
|
|
||||||
except:
|
|
||||||
if str(source).endswith('.wma'):
|
|
||||||
logging.debug(f'unsupported extension for samplerate: {source}')
|
|
||||||
else:
|
|
||||||
logging.warning(f'unable to obtain samplerate for {source}')
|
|
||||||
|
|
||||||
samplerate_map = {
|
|
||||||
192000: 48000,
|
|
||||||
96000: 48000,
|
|
||||||
88200: 44100,
|
|
||||||
# preserve as-is
|
|
||||||
48000: 48000,
|
|
||||||
44100: 44100,
|
|
||||||
}
|
|
||||||
|
|
||||||
target_samplerate = samplerate_map.get(source_samplerate)
|
|
||||||
if source_samplerate and not target_samplerate:
|
|
||||||
logging.warning(f'unable to map source sample rate: {source_samplerate}')
|
|
||||||
samplerate_flags = ['-ar', str(target_samplerate)] if target_samplerate else []
|
samplerate_flags = ['-ar', str(target_samplerate)] if target_samplerate else []
|
||||||
|
|
||||||
self.check_output([
|
self.check_output([
|
||||||
'ffmpeg',
|
'ffmpeg',
|
||||||
|
'-loglevel', 'warning',
|
||||||
'-i', str(source),
|
'-i', str(source),
|
||||||
'-y', # overwrite output
|
|
||||||
'-codec:v', 'copy',
|
'-codec:v', 'copy',
|
||||||
'-codec:a', 'libmp3lame',
|
'-codec:a', 'libmp3lame',
|
||||||
'-qscale:a', '0'
|
'-qscale:a', '0'
|
||||||
] + samplerate_flags + [str(dest)])
|
] + samplerate_flags + [str(dest)])
|
||||||
|
|
||||||
|
def cp_or_convert(self, source: Path, dest: Path) -> None:
|
||||||
|
source_samplerate = None
|
||||||
|
if source.suffix.lower() not in NON_AUDIO_FMTS:
|
||||||
|
try:
|
||||||
|
source_samplerate = int(
|
||||||
|
self.check_output(
|
||||||
|
['soxi', '-r', str(source)],
|
||||||
|
has_side_effect=False,
|
||||||
|
quiet=True,
|
||||||
|
).decode("utf-8").strip()
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
if source.suffix.lower() in ['.aac', '.m4a', '.wma']:
|
||||||
|
# sox is known to not support these formats
|
||||||
|
logging.debug(f'unsupported extension for samplerate: {source}')
|
||||||
|
else:
|
||||||
|
logging.warning(f'unable to obtain samplerate for {source}')
|
||||||
|
|
||||||
def clean_name(path: str) -> Path:
|
target_samplerate = self.prefs.desired_samplerate(source_samplerate)
|
||||||
'''
|
if source_samplerate and not target_samplerate:
|
||||||
transform a path into something which most filesystems/protocols can reliably support
|
logging.warning(f'unable to map source sample rate: {source_samplerate}')
|
||||||
'''
|
|
||||||
out_path = Path()
|
if source_samplerate != target_samplerate:
|
||||||
for part in path.parts:
|
# resampling -> convert
|
||||||
blacklisted = '"\'!@#$%^&*()[]{};:,<>?`~|\\/'
|
self.convert(source, dest, target_samplerate)
|
||||||
part = unidecode(part)
|
elif source.suffix.lower() != dest.suffix:
|
||||||
part = ''.join(c for c in part if c not in blacklisted)
|
# transcoding -> convert
|
||||||
out_path /= part
|
self.convert(source, dest, target_samplerate)
|
||||||
return out_path
|
else:
|
||||||
|
# neither resampling nor transcoding -> simple copy will suffice
|
||||||
|
self.cp(source, dest)
|
||||||
|
|
||||||
class Sync:
|
class Sync:
|
||||||
def __init__(self, encoder: Encoder, in_dir: str, out_dir: str, force_copy: bool = False):
|
def __init__(self, encoder: Encoder, in_dir: str, out_dir: str, force_copy: bool = False):
|
||||||
|
@ -131,66 +239,100 @@ class Sync:
|
||||||
self.out_dir = out_dir
|
self.out_dir = out_dir
|
||||||
self.force_copy = force_copy
|
self.force_copy = force_copy
|
||||||
|
|
||||||
def target_name(self, source_name: str) -> Path:
|
def target_name(self, source_name: Path) -> Path | None:
|
||||||
n = clean_name(source_name)
|
n = clean_name(source_name)
|
||||||
if n.suffix in MAKE_MP3:
|
output_type = self.encoder.prefs.get_output(n.suffix)
|
||||||
return Path(str(n) + '.mp3')
|
|
||||||
else:
|
|
||||||
return n
|
|
||||||
|
|
||||||
def calculate_delta(self) -> tuple[set[Path], set[Path]]:
|
if output_type is None:
|
||||||
|
logger.warning(f"skipping {source_name} because i don't recognize its filetype ({n.suffix})")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if output_type == ".null":
|
||||||
|
return None
|
||||||
|
elif output_type == n.suffix:
|
||||||
|
return n
|
||||||
|
else:
|
||||||
|
return Path(str(n) + output_type)
|
||||||
|
|
||||||
|
def calculate_delta(self) -> tuple[set[Path], set[tuple[Path, Path]]]:
|
||||||
'''
|
'''
|
||||||
Returns the set of dest files which need to be deleted, followed by the files to copy
|
Returns, as a tuple:
|
||||||
|
- dest files which need to be deleted
|
||||||
|
- files to copy (in-path/out-path pairs)
|
||||||
|
|
||||||
|
all returned paths are relative to in_dir/out_dir.
|
||||||
'''
|
'''
|
||||||
in_files = { p.relative_to(self.in_dir) for p in Path(self.in_dir).rglob("*") if not p.is_dir() }
|
in_files = { p.relative_to(self.in_dir) for p in Path(self.in_dir).rglob("*") if not p.is_dir() }
|
||||||
logger.info(f'found {len(in_files)} files in source')
|
logger.info(f'found {len(in_files)} files in source')
|
||||||
|
|
||||||
|
in_out_map = ((in_f, self.target_name(in_f)) for in_f in in_files)
|
||||||
|
in_out_map = dict((in_f, out_f) for (in_f, out_f) in in_out_map if out_f is not None)
|
||||||
|
logger.info(f'recognized {len(in_files)} source files as media')
|
||||||
|
|
||||||
existing_out_files = { p.relative_to(self.out_dir) for p in Path(self.out_dir).rglob("*") if not p.is_dir() }
|
existing_out_files = { p.relative_to(self.out_dir) for p in Path(self.out_dir).rglob("*") if not p.is_dir() }
|
||||||
logger.info(f'found {len(existing_out_files)} files in dest')
|
logger.info(f'found {len(existing_out_files)} files in dest')
|
||||||
|
|
||||||
expected_out_files = { self.target_name(n) for n in in_files }
|
expected_out_files = in_out_map.values()
|
||||||
|
|
||||||
to_del = { f for f in existing_out_files if f not in expected_out_files }
|
to_del = { f for f in existing_out_files if f not in expected_out_files }
|
||||||
logger.info(f'found {len(to_del)} files to delete')
|
logger.info(f'found {len(to_del)} files to delete')
|
||||||
to_copy = { f for f in in_files if (self.force_copy or self.target_name(f) not in existing_out_files) and f.suffix not in IGNORE }
|
to_copy = { (in_f, out_f) for (in_f, out_f) in in_out_map.items() if (self.force_copy or out_f not in existing_out_files) }
|
||||||
logger.info(f'found {len(to_copy)} files to copy')
|
logger.info(f'found {len(to_copy)} files to copy')
|
||||||
|
|
||||||
return to_del, to_copy
|
return to_del, to_copy
|
||||||
|
|
||||||
def rm_dest_files(self, files: set[Path]) -> None:
|
def rm_dest_files(self, files: set[Path]) -> None:
|
||||||
|
'''
|
||||||
|
files are relative to out_dir
|
||||||
|
'''
|
||||||
for f in files:
|
for f in files:
|
||||||
logger.info(f'removing {f} because it does not exist on host')
|
logger.info(f'removing {f}')
|
||||||
self.encoder.remove(Path(self.out_dir) / f)
|
self.encoder.remove(Path(self.out_dir) / f)
|
||||||
|
|
||||||
def copy_one(self, name: Path) -> None:
|
def copy_one(self, src_name: Path, dest_name: Path) -> None:
|
||||||
source = self.in_dir / name
|
'''
|
||||||
dest = self.out_dir / self.target_name(name)
|
path names are relative to in_dir/out_dir
|
||||||
|
'''
|
||||||
|
source = Path(self.in_dir) / src_name
|
||||||
|
dest = Path(self.out_dir) / dest_name
|
||||||
|
|
||||||
self.encoder.ensure_dir(dest.parent)
|
self.encoder.ensure_dir(dest.parent)
|
||||||
|
|
||||||
if source.suffix in MAKE_MP3:
|
self.encoder.cp_or_convert(source, dest)
|
||||||
logger.debug(f'converting {source} -> {dest}')
|
|
||||||
self.encoder.convert(source, dest)
|
|
||||||
elif source.suffix in COPY_RAW:
|
|
||||||
logger.debug(f'copying {source} -> {dest}')
|
|
||||||
self.encoder.cp(source, dest)
|
|
||||||
else:
|
|
||||||
logger.warning(f"skipping {source} because I don't know what to do with that file type")
|
|
||||||
|
|
||||||
def cp_src_files(self, src_names: set[Path], jobs: int):
|
def try_invoke(self, f, *args) -> None:
|
||||||
logger.info(f'using {jobs} jobs to copy {len(src_names)} files')
|
"""
|
||||||
# Parallel(n_jobs=jobs)(delayed(copy_one)(encoder, in_dir / n, out_dir / target_name(n)) for n in src_names)
|
try to invoke `f` with the provided `args`, and log if it fails.
|
||||||
|
this overcomes the issue that background tasks which fail via Exception otherwise do so silently.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
f(*args)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"task failed: {e}")
|
||||||
|
|
||||||
|
def cp_files(self, file_pairs: set[tuple[Path, Path]], jobs: int):
|
||||||
|
logger.info(f'using {jobs} jobs to copy {len(file_pairs)} files')
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=jobs) as executor:
|
||||||
for n in src_names:
|
for src_f, dest_f in file_pairs:
|
||||||
executor.submit(self.copy_one, n)
|
executor.submit(self.try_invoke, self.copy_one, src_f, dest_f)
|
||||||
|
|
||||||
|
|
||||||
def sync_all(in_dir: str, out_dir: str, jobs: int = None, dry_run: bool = False, force_copy: bool = False) -> None:
|
def sync_all(
|
||||||
encoder = Encoder(dry_run=dry_run)
|
in_dir: str,
|
||||||
|
out_dir: str,
|
||||||
|
compress: bool = False,
|
||||||
|
compat: bool = False,
|
||||||
|
force_copy: bool = False,
|
||||||
|
dry_run: bool = False,
|
||||||
|
jobs: int = None,
|
||||||
|
) -> None:
|
||||||
|
prefs = TranscodePreferences(compress=compress, compat=compat)
|
||||||
|
encoder = Encoder(prefs, dry_run=dry_run)
|
||||||
sync = Sync(encoder, in_dir, out_dir, force_copy=force_copy)
|
sync = Sync(encoder, in_dir, out_dir, force_copy=force_copy)
|
||||||
to_del, to_copy = sync.calculate_delta()
|
to_del, to_copy = sync.calculate_delta()
|
||||||
|
|
||||||
sync.rm_dest_files(to_del)
|
sync.rm_dest_files(to_del)
|
||||||
sync.cp_src_files(to_copy, jobs = jobs or multiprocessing.cpu_count())
|
sync.cp_files(to_copy, jobs = jobs or multiprocessing.cpu_count())
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
|
@ -199,6 +341,8 @@ def main() -> None:
|
||||||
parser = argparse.ArgumentParser(description="synchronize music from one directory to another, possibly compressing it")
|
parser = argparse.ArgumentParser(description="synchronize music from one directory to another, possibly compressing it")
|
||||||
parser.add_argument("src", help="source directory")
|
parser.add_argument("src", help="source directory")
|
||||||
parser.add_argument("dest", help="destination directory")
|
parser.add_argument("dest", help="destination directory")
|
||||||
|
parser.add_argument("--compress", action='store_true', help="compress audio files (to mp3)")
|
||||||
|
parser.add_argument("--compat", action='store_true', help="convert poorly supported file formats to better-supported formats (e.g. avoid wma)")
|
||||||
parser.add_argument("--jobs", help="number of cores to compress music with (default: all CPU cores)", default=None, type=int)
|
parser.add_argument("--jobs", help="number of cores to compress music with (default: all CPU cores)", default=None, type=int)
|
||||||
parser.add_argument("--dry-run", action='store_true', help="don't actually run any commands")
|
parser.add_argument("--dry-run", action='store_true', help="don't actually run any commands")
|
||||||
parser.add_argument("--verbose", action='store_true', help="more logging")
|
parser.add_argument("--verbose", action='store_true', help="more logging")
|
||||||
|
@ -212,7 +356,15 @@ def main() -> None:
|
||||||
if args.quiet:
|
if args.quiet:
|
||||||
logger.setLevel(logging.WARN)
|
logger.setLevel(logging.WARN)
|
||||||
|
|
||||||
sync_all(args.src, args.dest, args.jobs, args.dry_run, args.force_copy)
|
sync_all(
|
||||||
|
args.src,
|
||||||
|
args.dest,
|
||||||
|
compress=args.compress,
|
||||||
|
compat=args.compat,
|
||||||
|
force_copy=args.force_copy,
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
jobs=args.jobs,
|
||||||
|
)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user