Skip to content
Snippets Groups Projects
main.py 3.45 KiB
import concurrent
import glob
import logging
import re
import shutil
from pathlib import Path

from txtdownloader import TXTDownloader
from ytdownloader import YTDownloader
from coverdownloader import CoverDownloader
from db import *

logging.basicConfig(level=logging.INFO)

DISALLOWED_CHARS_REGEX = r"\~\"\#\%\&\*\:\<\>\?\/\\\{\|\}\."

class USDBDownloader:
    def __init__(self, rawdir: str, libdir: str, cache=True):
        self.rawdir = Path(rawdir)
        self.libdir = Path(libdir)
        self.db = USDB("USDB.xlsx")
        self.txt = TXTDownloader("nv0rb8ma82p37qrvduvch6j3f6", (self.rawdir / "txt").resolve(), cache)
        self.yt = YTDownloader((self.rawdir / "audio").resolve(), (self.rawdir / "video").resolve(), cache)
        self.cv = CoverDownloader((self.rawdir / "covers").resolve(), cache)

    def download(self):
        executor = concurrent.futures.ThreadPoolExecutor(24)
        futures = [executor.submit(self.download_song, song) for _, song in self.db.data.iterrows()]
        concurrent.futures.wait(futures)

    def download_sample(self):
        executor = concurrent.futures.ThreadPoolExecutor(24)
        futures = [executor.submit(self.download_song, song) for _, song in self.db.data.sample(5).iterrows()]
        concurrent.futures.wait(futures)

    def download_song(self, song: pandas.Series):
        try:
            usdb_url = get_usdb_url(song)
            cover_url = get_cover_image_url(song)
            yt_url = get_yt_video_url(song)
            song_name = re.sub(DISALLOWED_CHARS_REGEX, "", get_song_name(song))
            artist = re.sub(DISALLOWED_CHARS_REGEX, "", get_artist_name(song))
            spotify_uri = get_track_id(song)
            gap = get_gap(song)
            video_gap = get_video_gap(song)
            start = get_start(song)
            end = get_end(song)
            language = get_language(song)
            year = str(get_date(song).year)

            logging.info(f"Downloading: {artist} - {song_name}: {usdb_url}, {cover_url}, {type(yt_url)}")

            if type(usdb_url) == str and usdb_url != " " and usdb_url != "MISSING":
                self.txt.download(usdb_url, artist, song_name, spotify_uri, gap, video_gap, start, end, language, year)

            if type(cover_url) == str and cover_url != " " and cover_url != "MISSING":
                self.cv.download(cover_url, artist, song_name, spotify_uri)

            if type(yt_url) == str and yt_url != " " and yt_url != "MISSING":
                self.yt.download(yt_url, artist, song_name, spotify_uri)

        except Exception as e:
            logging.error(e)

    def build_library(self):
        for file in glob.glob(str(self.rawdir / "txt" / "*.txt")):
            file = Path(file).stem
            outdir = self.libdir / file
            Path(outdir).mkdir(parents=True, exist_ok=True)

            if (self.rawdir / "txt" / f"{file}.txt").is_file():
                shutil.copy2(self.rawdir / "txt" / f"{file}.txt", outdir)
            if (self.rawdir / "audio" / f"{file}.mp3").is_file():
                shutil.copy2(self.rawdir / "audio" / f"{file}.mp3", outdir)
            if (self.rawdir / "video" / f"{file}.mp4").is_file():
                shutil.copy2(self.rawdir / "video" / f"{file}.mp4", outdir)
            if (self.rawdir / "covers" / f"{file}.jpg").is_file():
                shutil.copy2(self.rawdir / "covers" / f"{file}.jpg", outdir)


if __name__ == '__main__':
    dl = USDBDownloader("./out", "./lib", True)
    dl.download()
    dl.build_library()