Files
Markov/src/rulesparser.py

70 lines
2.1 KiB
Python

from rule import Rule, EMPTY_SYMBOL
import re
class RulesParser:
TRANSFORM = r'->'
B_TRANSFORM = r'->\|'
NEWLINE = '\n'
IGNORE = r'\s+'
COMMENTS = r'//.+$'
EMPTY = re.escape(EMPTY_SYMBOL)
def parse(self, file: str) -> list[Rule]:
"""
Parsing file according to syntax, specified
in class variables. Hardcoded for now (and forever)
"""
with open(file, 'r') as f:
text = f.read()
lines = self._get_lines(text)
rules = list()
for line in lines:
rules.append(self._parse_rule(line))
return rules
def _parse_rule(self, line: str) -> Rule:
"""
tries to parse rule according to set grammar
"""
tokens = re.split(self.IGNORE, line)
# we always expect 3 parts: operand, arrow, target
arrow = tokens[1]
is_blocking = None
if re.fullmatch(self.TRANSFORM, arrow):
is_blocking = False
elif re.fullmatch(self.B_TRANSFORM, arrow):
is_blocking = True
else:
raise ValueError(f"Can't recognize transform symbol. "
f"\"{self.TRANSFORM}\" or \"{self.B_TRANSFORM}\""
f" expected, but \"{arrow}\" encountered")
return Rule(
operand=tokens[0],
target=tokens[2],
is_blocking=is_blocking
)
def _get_lines(self, src: str) -> list[str]:
"""
Get cleaned lines only with rules to parse
"""
text = self._remove_comments(src)
text = self._strip_lines(text)
lines = list(filter(lambda x: x != '', text.split(self.NEWLINE)))
return lines
def _remove_comments(self, src: str) -> str:
"""
removes comments from end of lines and returns
cleaned text
"""
return re.sub(self.COMMENTS, '', src, flags=re.M)
def _strip_lines(self, src: str) -> str:
"""
Strips whitespaces at the end of lines
"""
result = re.sub(r' +$', '', src, flags=re.M)
# result = re.sub(r"\n+", r'\n', result)
return result