Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import codecs
import logging
import re
import os.path
from UltraStarSongFile import UltraStarSongFile
class UltraStarSongFileParser:
IDENTIFIER_KEYS = {
"TITLE": None,
"ARTIST": None,
"MP3": None,
"BPM": None,
"GAP": None,
"COVER": None,
"BACKGROUND": None,
"VIDEO": None,
"VIDEOGAP": None,
"GENRE": None,
"EDITION": None,
"CREATOR": None,
"LANGUAGE": None,
"YEAR": None,
"START": None,
"END": None,
"RESOLUTION": None,
"NOTESGAP": None,
"RELATIVE": None,
"ENCODING": None,
"PREVIEWSTART": None,
"MEDLEYSTARTBEAT": None,
"MEDLEYENDBEAT": None,
"CALCMEDLEY": None,
"DUETSINGERP1": None,
"DUETSINGERP2": None,
"P1": None,
"P2": None,
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
}
HEADER_PATTERN = re.compile(r"#([A-Za-z0-9]+):(.*)")
def __init__(self, strict_mode=False):
self.strict_mode = strict_mode
def parse_file(self, file, encoding=None):
song = {}
content = []
self._check_file(file)
if not os.path.isfile(file):
raise FileNotFoundError(f"File {file} not found.")
if not encoding:
encoding = self._find_encoding(file)
if not encoding:
logging.warning("No encoding specified and none found in file. "
"Fallback to latin1.")
encoding = "latin1"
with open(file, mode="r", encoding=encoding) as f:
logging.debug("=> Searching for Encoding")
logging.debug(f"=> Reading file {file}")
for linenum, line in enumerate(f, 1):
logging.log(5, f"=> Reading line: {line}")
if not line:
continue
elif line[0] == "#":
logging.debug(f"=> Parsing header line: {line}")
self._parse_header(line.strip(), song, linenum)
elif line[0] in (":", "-", "*", "F", "P", "B"):
self._parse_content(line, content, linenum)
logging.debug("=> Parsed content end marker")
break
else:
raise ValueError(f"Line {linenum} unparsable, prefix "
song["path"] = file
song["songdata"] = content
file_obj = UltraStarSongFile()
file_obj.set_attributes(parsed_data=song)
logging.debug("Parsed song %s by %s, from \"%s\"", song["title"],
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
song["artist"], file)
return file_obj
def _find_encoding(self, file):
with open(file, mode="r", errors="ignore", encoding="iso-8859-1") as f:
encoding = None
pattern = re.compile(r"#ENCODING:(.*)")
for line in f:
match = re.fullmatch(pattern, line)
if match:
encoding = match.group(1).lower()
logging.debug("Found encoding identifier in file: %s",
encoding)
if encoding == "auto":
encoding = None
break
if encoding:
try:
# invalid encoding will raise LookupError
codecs.lookup(encoding)
except LookupError:
logging.warning(
"Encoding %s is not known by python. Using fallback.",
encoding)
encoding = None
return encoding
def _check_file(self, filename):
if "license" in filename.lower() or "readme" in filename.lower():
raise ParseIgnore("Filename sounds like a readme or license "
"file, skipping")
with open(filename, mode="rb") as f:
filebytes = f.read()
if not filebytes or re.fullmatch(b"\x00*", filebytes):
raise ParseErrorFileBroken(f"This file is empty, "
f"or only contains null bytes.")
def _parse_header(self, line, song, linenum):
match = re.fullmatch(self.HEADER_PATTERN, line)
if not match:
raise ValueError(f"Line {linenum}: Could not parse line: {line}")
identifier, value = match.group(1, 2)
if identifier.upper() not in self.IDENTIFIER_KEYS:
if self.strict_mode:
raise ValueError(f"Line {linenum}: Identifier {identifier} is "
f"unknown and strict mode is set.")
logging.warning(
"Line %3i: Identifier %s is not known, adding as custom tag.",
linenum, identifier)
tag = identifier
else:
tag = self.IDENTIFIER_KEYS[identifier.upper()]
if not tag:
tag = identifier.lower()
if tag in ("p1", "p2", "duetsingerp1", "duetsingerp2"):
song["duet"] = True
# convert legacy tags
if tag == "duetsingerp1":
tag = "p1"
if tag == "duetsingerp2":
tag = "p2"
if tag in song:
if self.strict_mode:
raise ValueError(
f"Line {linenum} Identifier {identifier} is duplicate.")
logging.error("Line %3i: Identifier %s is duplicate, ignoring.",
linenum, identifier)
else:
logging.debug("Line %3i: Parsed tag %s with value %s", linenum,
tag, value)
song[tag] = value
def _parse_content(self, line, content, linenum):
content.append(line)
class ParseIgnore(Exception):
"""Parse error which indicates that the file should be ignored"""
class ParseErrorFileBroken(Exception):
"""Parse error which indicates the file is broken beyond repair"""