From 6f137fd50d68dae0148e02785807d95a71eed2ee Mon Sep 17 00:00:00 2001 From: Luca Bilke Date: Thu, 29 Jun 2023 23:57:07 +0200 Subject: [PATCH] lots more changes --- TODO.md | 14 +--- conf.toml | 1 - tidal-scraper/download.py | 143 +++++++++++++++++++++----------------- tidal-scraper/helper.py | 31 +++++++-- tidal-scraper/metadata.py | 13 ++-- tidal-scraper/run.py | 13 ++-- tidal-scraper/state.py | 48 ++++++++----- 7 files changed, 149 insertions(+), 114 deletions(-) diff --git a/TODO.md b/TODO.md index 388215d..1138ed9 100644 --- a/TODO.md +++ b/TODO.md @@ -1,12 +1,4 @@ -- [ ] allow downloads of lists of albums, artists and tracks, and set their downloaded status -- [ ] write all metadata from tidal to downloaded tracks - -- [ ] get lists of favorited albums, artists and tracks -- [ ] allow export of these lists (with names, IDs and downloaded status) -- [ ] allow import of these lists - -- [ ] allow configuration with a toml file -- [ ] allow configuration of download path -- [ ] write log of errors - - [ ] installer or pip package +- [ ] installer should create state and config homes if not existing +- [ ] proper SIGTERM handling +- [ ] decrypt and write in chunks diff --git a/conf.toml b/conf.toml index acc34c2..fab14cb 100644 --- a/conf.toml +++ b/conf.toml @@ -7,7 +7,6 @@ quality = "lossless" user_id = dest_dir = "./downloads/" -state_dir = "./state/" # The following templates are passed an artist, album and track object. # Possible attributes can be found here: https://tidalapi.netlify.app/api.html # The artist is derived from the album a track is in rather than the track itself. diff --git a/tidal-scraper/download.py b/tidal-scraper/download.py index 1f7942a..4e59683 100644 --- a/tidal-scraper/download.py +++ b/tidal-scraper/download.py @@ -1,5 +1,5 @@ import metadata -from helper import CONF, EXTENSIONS, clean_template, log_error +from helper import conf, extensions, clean_template, log_error import tidalapi import os @@ -36,103 +36,118 @@ def __decode_key_id(key_id: str) -> Tuple[bytes, bytes]: def __decrypt_file(fp: BinaryIO, key: bytes, nonce: bytes) -> None: counter = Counter.new(64, prefix=nonce, initial_value=0) decryptor = AES.new(key, AES.MODE_CTR, counter=counter) - data = decryptor.decrypt(fp) + fp.seek(0) + data = fp.read() + data = decryptor.decrypt(data) fp.write(data) -def __download_file(url: str, fp: BinaryIO) -> None: - r = requests.get(url, stream=True) - r.raise_for_status() - total_bytes = int(r.headers.get("content-length", 0)) - progress = tqdm(total=total_bytes, unit="iB", unit_scale=True) - for data in r.iter_content(1024): - fp.write(data) - progress.update(len(data)) - progress.close() +def __download_file(url: str, fp: BinaryIO) -> str: + with requests.get(url, stream=True) as r: + if conf["debug"]: + print(r.headers) + r.raise_for_status() + mime = r.headers.get("Content-Type", "") + total_bytes = int(r.headers.get("Content-Length", 0)) + with tqdm(total=total_bytes, unit="iB", unit_scale=True) as p: + for data in r.iter_content(1024): + fp.write(data) + p.update(len(data)) + return mime -def download_track(track: tidalapi.Track, dest_path: str) -> None: +def download_track(track: tidalapi.Track, dest: str) -> None: album = track.album assert album - dest_path += clean_template(CONF["track_name"], track=track) - - try: - stream = track.stream() - manifest = json.loads(b64decode(stream.manifest)) - print(manifest) - url = manifest["urls"][0] - for ext in EXTENSIONS: - if ext in url and ext is not ".mp4": - dest_path += ext - elif ".mp4" in url: - if "ac4" in stream.codec or "mha1" in stream.codec: - dest_path += ".mp4" + print(f"Starting {album.artist.name} - {track.name}") + dest += clean_template(conf["track_name"], track=track) + http_failures = 0 + while http_failures <= 3: + try: + stream = track.stream() + manifest = json.loads(b64decode(stream.manifest)) + if conf["debug"]: + print(manifest) + url = manifest["urls"][0] + codec = manifest["codecs"] + if ".mp4" in url: + if "ac4" in codec or "mha1" in codec: + dest += ".mp4" else: - dest_path += ".m4a" - if os.path.exists(dest_path + ext) and CONF["skip_downloaded"]: - print(f"Skipping {album.artist.name} - {track.name}") + dest += ".m4a" + else: + for ext in (x for x in extensions if x != ".mp4"): + dest += ext + if os.path.exists(dest) and conf["skip_downloaded"]: + print(f"Skipping track") return - assert track.name and album.name - os.makedirs(os.path.dirname(dest_path), exist_ok=True) - with io.BytesIO() as b: - print(f"Downloading {album.artist.name} - {track.name}") - key_id = manifest.get("keyId", None) - __download_file(url, b) - - if key_id: - __decrypt_file(b, *__decode_key_id(key_id)) - metadata.write( - b, - manifest["codecs"], - track.name, - album.name, - str(track.track_num), - str(album.num_tracks), + assert track.name and album.name + with io.BytesIO() as b: + print(f"Downloading track") + key_id = manifest.get("keyId", None) + mime = __download_file(url, b) + if key_id: + print(f"Decrypting track") + __decrypt_file(b, *__decode_key_id(key_id)) + metadata.write( + b, + mime, + track.name, + album.name, + str(track.track_num), + str(album.num_tracks), + ) + with open(dest, "wb") as f: + data = b.getvalue() + f.write(data) + print() + break + except requests.HTTPError: + http_failures += 1 + except: + log_error( + "Failure while downloading {artist} - {track}", + artist=album.artist.name, + track=track.name, ) - with open(dest_path, "wb") as f: - data = b.read() - f.write(data) - except: - log_error( - "Failure while downloading {artist} - {track}", - artist=album.artist.name, - track=track.name, - ) + break def download_cover( - obj: tidalapi.Album | tidalapi.Playlist, dest_path: str, size: int + obj: tidalapi.Album | tidalapi.Playlist, dest: str, size: int ) -> None: - if os.path.exists(dest_path) and CONF["skip_downloaded"]: + if os.path.exists(dest) and conf["skip_downloaded"]: return url = obj.image(size) - with open(dest_path, "wb") as f: + with open(dest, "wb") as f: __download_file(url, f) def download_album(album: tidalapi.Album) -> None: - dest_path = clean_template( - CONF["dest_dir"] + CONF["album_dir"], + dest = clean_template( + conf["dest_dir"] + "/" + conf["album_dir"], album=album, artist=album.artist, ) - download_cover(album, dest_path, CONF["album_image_size"]) + os.makedirs(os.path.dirname(dest), exist_ok=True) + download_cover(album, dest, conf["album_image_size"]) tracks = album.tracks() for track in tracks: - download_track(track, dest_path) + download_track(track, dest) def download_playlist(playlist: tidalapi.Playlist) -> None: - dest_path = clean_template( - CONF["dest_dir"] + CONF["playlist_dir"], + dest = clean_template( + conf["dest_dir"] + "/" + conf["playlist_dir"], playlist=playlist, ) - download_cover(playlist, dest_path, CONF["playlist_image_size"]) + os.makedirs(os.path.dirname(dest), exist_ok=True) + download_cover(playlist, dest, conf["playlist_image_size"]) tracks = playlist.tracks() for track in tracks: - download_track(track, dest_path) + download_track(track, dest) def download_artist(artist: tidalapi.Artist) -> None: diff --git a/tidal-scraper/helper.py b/tidal-scraper/helper.py index 16c8de5..0cc1f28 100644 --- a/tidal-scraper/helper.py +++ b/tidal-scraper/helper.py @@ -1,20 +1,37 @@ import re +import os import tomllib import sys import traceback -with open("../config.toml", "rb") as conf: - CONF = tomllib.load(conf) +extensions = [".flac", ".mp4", ".m4a", ""] + +home = os.getenv("HOME") +state_dir = os.getenv("XDG_STATE_HOME") or os.getenv("XDG_CACHE_HOME") +conf_dir = os.getenv("XDG_CONFIG_HOME") +if not state_dir: + assert home + state_dir = home + "/.cache" + +if not conf_dir: + assert home + conf_dir = home + "/.config" +conf_dir += "/tidal-scraper" +state_dir += "/tidal-scraper" + +with open(conf_dir + "/conf.toml", "rb") as f: + conf = tomllib.load(f) -EXTENSIONS = ['.flac', '.mp4', '.m4a', ''] def clean_template(path: str, **kwargs) -> str: + path = os.path.expanduser(path) split = path.split("/") cleaned_split = [re.sub("/", " ", s.format(**kwargs)) for s in split] return "/".join(cleaned_split) + def log_error(template: str, **kwargs): - with open(CONF['error_log']) as f: - error = template.format(**kwargs) - f.write(error) - traceback.print_exception(*sys.exc_info(), file=f) + with open(conf["error_log"], "a") as f: + msg = template.format(**kwargs) + f.write(msg + "\n\n\n") + traceback.format_exception(*sys.exc_info()) diff --git a/tidal-scraper/metadata.py b/tidal-scraper/metadata.py index 7cbde92..bc18a80 100644 --- a/tidal-scraper/metadata.py +++ b/tidal-scraper/metadata.py @@ -103,7 +103,7 @@ def __write_mp4(file: mp4.MP4, **kwargs) -> None: def write( fp: BinaryIO, - codec: str, + mime: str, title: str, album: str, tracknumber: str, @@ -122,14 +122,13 @@ def write( cover_mime: str | None = None, ) -> None: args = locals() - # TODO: Figure out what codecs are sent in the manifest - # WARN: This match is currently using placeholders - match codec: - case "flac": + fp.seek(0) + match mime: + case "audio/flac": f = flac.FLAC(fp) __write_flac(f, *args) - case "aac": + case "audio/mp4": f = mp4.MP4(fp) __write_mp4(f, *args) case _: - raise Exception(f"Couldn't recognize codec {codec}") + raise Exception(f"Couldn't recognize mimetype {mime}") diff --git a/tidal-scraper/run.py b/tidal-scraper/run.py index 2786004..791e1d7 100644 --- a/tidal-scraper/run.py +++ b/tidal-scraper/run.py @@ -1,14 +1,17 @@ #!/bin/env python from download import download_album from state import State -from helper import CONF - -s = State(CONF['user_id'], CONF['quality']) +from helper import conf +s = State(conf['user_id'], conf['quality']) s.login() - albums = s.favorites.albums() +try: + s.load_dl_state +except: + pass + download_album(albums[0]) s.set_dl_state(albums[0], True) -s.write_state() +s.write_dl_state() diff --git a/tidal-scraper/state.py b/tidal-scraper/state.py index c6e4f1e..ff33dfc 100644 --- a/tidal-scraper/state.py +++ b/tidal-scraper/state.py @@ -1,11 +1,13 @@ import json from datetime import datetime from tidalapi import session, user, playlist, media, album, artist -from helper import CONF +from helper import conf, state_dir class State: - def __init__(self, user_id: int, quality: str): + def __init__( + self, user_id: int, quality: str, dl_state_path: str = state_dir + "/state.json" + ): match quality: case "master": q = session.Quality.master @@ -20,15 +22,18 @@ class State: config = session.Config(quality=q) self.user_id = user_id self.session = session.Session(config) - self.favorites = user.Favorites(self.session, CONF["user_id"]) - self._state = { - "albums": {}, - "artists": {}, - "playlists": {}, - "tracks": {}, - } + self.favorites = user.Favorites(self.session, conf["user_id"]) + try: + self.load_dl_state(dl_state_path) + except: + self._state = { + "albums": {}, + "artists": {}, + "playlists": {}, + "tracks": {}, + } - def login(self, auth_file: str | None = CONF["state_dir"] + "auth.json") -> None: + def login(self, auth_file: str | None = state_dir + "/auth.json") -> None: s = self.session try: assert auth_file @@ -40,7 +45,7 @@ class State: a["refresh_token"], datetime.fromtimestamp(a["expiry_time"]), ) - except (OSError, IndexError, AssertionError): + except (FileNotFoundError, IndexError, AssertionError): s.login_oauth_simple() if ( s.token_type @@ -79,14 +84,19 @@ class State: self._state[t][obj.id] = downloaded - def write_state( - self, state_file_path: str = CONF["state_dir"] + "state.json" - ) -> None: - with open(state_file_path, "w") as f: + def write_dl_state(self, dl_state_path: str | None = None) -> None: + if dl_state_path is None: + dl_state_path = state_dir + "/state.json" + with open(dl_state_path, "w") as f: json.dump(self._state, f) - def load_state( - self, state_file_path: str = CONF["state_dir"] + "state.json" - ) -> None: - with open(state_file_path, "r") as f: + def load_dl_state(self, dl_state_path: str | None = None) -> None: + if dl_state_path is None: + dl_state_path = state_dir + "/state.json" + with open(dl_state_path, "r") as f: self._state = json.load(f) + + assert type(self._state["albums"]) is dict[int, bool] + assert type(self._state["artists"]) is dict[int, bool] + assert type(self._state["playlists"]) is dict[int, bool] + assert type(self._state["tracks"]) is dict[int, bool]