import codecs import logging import re import os.path from UltraStarSongFile import UltraStarSongFile class UltraStarSongFileParser: IDENTIFIER_KEYS = { "TITLE": None, "ARTIST": None, "MP3": None, "BPM": None, "GAP": None, "COVER": None, "BACKGROUND": None, "VIDEO": None, "VIDEOGAP": None, "GENRE": None, "EDITION": None, "CREATOR": None, "LANGUAGE": None, "YEAR": None, "START": None, "END": None, "RESOLUTION": None, "NOTESGAP": None, "RELATIVE": None, "ENCODING": None, "PREVIEWSTART": None, "MEDLEYSTARTBEAT": None, "MEDLEYENDBEAT": None, "CALCMEDLEY": None, "DUETSINGERP1": None, "DUETSINGERP2": None, "P1": None, "P2": None, "AUTHOR": None, } HEADER_PATTERN = re.compile(r"#([A-Za-z0-9]+):(.*)") def __init__(self, strict_mode=False): self.strict_mode = strict_mode def parse_file(self, file, encoding=None): song = {} content = [] self._check_file(file) if not os.path.isfile(file): raise FileNotFoundError(f"File {file} not found.") if not encoding: encoding = self._find_encoding(file) if not encoding: logging.warning("No encoding specified and none found in file. " "Fallback to latin1.") encoding = "latin1" with open(file, mode="r", encoding=encoding) as f: logging.debug("=> Searching for Encoding") logging.debug(f"=> Reading file {file}") for linenum, line in enumerate(f, 1): logging.log(5, f"=> Reading line: {line}") if not line: continue elif line[0] == "#": logging.debug(f"=> Parsing header line: {line}") self._parse_header(line.strip(), song, linenum) elif line[0] in (":", "-", "*", "F", "P", "B"): self._parse_content(line, content, linenum) elif line == "E" or line == "E\n": logging.debug("=> Parsed content end marker") break else: raise ValueError(f"Line {linenum} unparsable, prefix " f"unknown: {repr(line)}") song["path"] = file song["songdata"] = content file_obj = UltraStarSongFile() file_obj.set_attributes(parsed_data=song) logging.debug("Parsed song %s by %s, from \"%s\"", song["title"], song["artist"], file) return file_obj def _find_encoding(self, file): with open(file, mode="r", errors="ignore", encoding="iso-8859-1") as f: encoding = None pattern = re.compile(r"#ENCODING:(.*)") for line in f: match = re.fullmatch(pattern, line) if match: encoding = match.group(1).lower() logging.debug("Found encoding identifier in file: %s", encoding) if encoding == "auto": encoding = None break if encoding: try: # invalid encoding will raise LookupError codecs.lookup(encoding) except LookupError: logging.warning( "Encoding %s is not known by python. Using fallback.", encoding) encoding = None return encoding def _check_file(self, filename): if "license" in filename.lower() or "readme" in filename.lower(): raise ParseIgnore("Filename sounds like a readme or license " "file, skipping") with open(filename, mode="rb") as f: filebytes = f.read() if not filebytes or re.fullmatch(b"\x00*", filebytes): raise ParseErrorFileBroken(f"This file is empty, " f"or only contains null bytes.") def _parse_header(self, line, song, linenum): match = re.fullmatch(self.HEADER_PATTERN, line) if not match: raise ValueError(f"Line {linenum}: Could not parse line: {line}") identifier, value = match.group(1, 2) if identifier.upper() not in self.IDENTIFIER_KEYS: if self.strict_mode: raise ValueError(f"Line {linenum}: Identifier {identifier} is " f"unknown and strict mode is set.") logging.warning( "Line %3i: Identifier %s is not known, adding as custom tag.", linenum, identifier) tag = identifier else: tag = self.IDENTIFIER_KEYS[identifier.upper()] if not tag: tag = identifier.lower() if tag in ("p1", "p2", "duetsingerp1", "duetsingerp2"): song["duet"] = True # convert legacy tags if tag == "duetsingerp1": tag = "p1" if tag == "duetsingerp2": tag = "p2" if tag in song: if self.strict_mode: raise ValueError( f"Line {linenum} Identifier {identifier} is duplicate.") logging.error("Line %3i: Identifier %s is duplicate, ignoring.", linenum, identifier) else: logging.debug("Line %3i: Parsed tag %s with value %s", linenum, tag, value) song[tag] = value def _parse_content(self, line, content, linenum): content.append(line) class ParseIgnore(Exception): """Parse error which indicates that the file should be ignored""" class ParseErrorFileBroken(Exception): """Parse error which indicates the file is broken beyond repair"""