Skip to content
Snippets Groups Projects
UltraStarSongFileParser.py 5.9 KiB
Newer Older
kjk's avatar
kjk committed
import codecs
import logging
import re
import os.path

from UltraStarSongFile import UltraStarSongFile


class UltraStarSongFileParser:
    IDENTIFIER_KEYS = {
        "TITLE": None,
        "ARTIST": None,
        "MP3": None,
        "BPM": None,
        "GAP": None,
        "COVER": None,
        "BACKGROUND": None,
        "VIDEO": None,
        "VIDEOGAP": None,
        "GENRE": None,
        "EDITION": None,
        "CREATOR": None,
        "LANGUAGE": None,
        "YEAR": None,
        "START": None,
        "END": None,
        "RESOLUTION": None,
        "NOTESGAP": None,
        "RELATIVE": None,
        "ENCODING": None,
        "PREVIEWSTART": None,
        "MEDLEYSTARTBEAT": None,
        "MEDLEYENDBEAT": None,
        "CALCMEDLEY": None,
        "DUETSINGERP1": None,
        "DUETSINGERP2": None,
        "P1": None,
        "P2": None,
TheJoKlLa's avatar
TheJoKlLa committed
        "AUTHOR": None,
kjk's avatar
kjk committed
    }
    HEADER_PATTERN = re.compile(r"#([A-Za-z0-9]+):(.*)")

    def __init__(self, strict_mode=False):
        self.strict_mode = strict_mode

    def parse_file(self, file, encoding=None):
        song = {}
        content = []

        self._check_file(file)

        if not os.path.isfile(file):
            raise FileNotFoundError(f"File {file} not found.")

        if not encoding:
            encoding = self._find_encoding(file)
        if not encoding:
            logging.warning("No encoding specified and none found in file. "
                         "Fallback to latin1.")
            encoding = "latin1"

        with open(file, mode="r", encoding=encoding) as f:
            logging.debug("=> Searching for Encoding")

            logging.debug(f"=> Reading file {file}")
            for linenum, line in enumerate(f, 1):
                logging.log(5, f"=> Reading line: {line}")
                if not line:
                    continue
                elif line[0] == "#":
                    logging.debug(f"=> Parsing header line: {line}")
                    self._parse_header(line.strip(), song, linenum)
                elif line[0] in (":", "-", "*", "F", "P", "B"):
                    self._parse_content(line, content, linenum)
kjk's avatar
kjk committed
                elif line == "E" or line == "E\n":
kjk's avatar
kjk committed
                    logging.debug("=> Parsed content end marker")
                    break
                else:
                    raise ValueError(f"Line {linenum} unparsable, prefix "
kjk's avatar
kjk committed
                                     f"unknown: {repr(line)}")
kjk's avatar
kjk committed
            song["path"] = file
            song["songdata"] = content
            file_obj = UltraStarSongFile()
            file_obj.set_attributes(parsed_data=song)
TheJoKlLa's avatar
TheJoKlLa committed
            logging.debug("Parsed song %s by %s, from \"%s\"", song["title"],
kjk's avatar
kjk committed
                         song["artist"], file)
            return file_obj

    def _find_encoding(self, file):
        with open(file, mode="r", errors="ignore", encoding="iso-8859-1") as f:
            encoding = None
            pattern = re.compile(r"#ENCODING:(.*)")
            for line in f:
                match = re.fullmatch(pattern, line)
                if match:
                    encoding = match.group(1).lower()
                    logging.debug("Found encoding identifier in file: %s",
                                  encoding)
                    if encoding == "auto":
                        encoding = None
                    break
        if encoding:
            try:
                # invalid encoding will raise LookupError
                codecs.lookup(encoding)
            except LookupError:
                logging.warning(
                    "Encoding %s is not known by python. Using fallback.",
                    encoding)
                encoding = None
        return encoding

    def _check_file(self, filename):
        if "license" in filename.lower() or "readme" in filename.lower():
            raise ParseIgnore("Filename sounds like a readme or license "
                              "file, skipping")

        with open(filename, mode="rb") as f:
            filebytes = f.read()
            if not filebytes or re.fullmatch(b"\x00*", filebytes):
                raise ParseErrorFileBroken(f"This file is empty, "
                                           f"or only contains null bytes.")

    def _parse_header(self, line, song, linenum):
        match = re.fullmatch(self.HEADER_PATTERN, line)
        if not match:
            raise ValueError(f"Line {linenum}: Could not parse line: {line}")
        identifier, value = match.group(1, 2)
        if identifier.upper() not in self.IDENTIFIER_KEYS:
            if self.strict_mode:
                raise ValueError(f"Line {linenum}: Identifier {identifier} is "
                                 f"unknown and strict mode is set.")
            logging.warning(
                "Line %3i: Identifier %s is not known, adding as custom tag.",
                linenum, identifier)
            tag = identifier
        else:
            tag = self.IDENTIFIER_KEYS[identifier.upper()]
            if not tag:
                tag = identifier.lower()

        if tag in ("p1", "p2", "duetsingerp1", "duetsingerp2"):
            song["duet"] = True

        # convert legacy tags
        if tag == "duetsingerp1":
            tag = "p1"
        if tag == "duetsingerp2":
            tag = "p2"

        if tag in song:
            if self.strict_mode:
                raise ValueError(
                    f"Line {linenum} Identifier {identifier} is duplicate.")
            logging.error("Line %3i: Identifier %s is duplicate, ignoring.",
                          linenum, identifier)
        else:
            logging.debug("Line %3i: Parsed tag %s with value %s", linenum,
                          tag, value)
            song[tag] = value

    def _parse_content(self, line, content, linenum):
        content.append(line)


class ParseIgnore(Exception):
    """Parse error which indicates that the file should be ignored"""


class ParseErrorFileBroken(Exception):
    """Parse error which indicates the file is broken beyond repair"""