Files
Markov/src/rulesparser.py
2025-03-16 01:41:59 +03:00

88 lines
2.8 KiB
Python

from rule import Rule, EMPTY_SYMBOL
import re
class RulesParser:
TRANSFORM = r'->'
B_TRANSFORM = r'->\|'
NEWLINE = '\n'
IGNORE = r'\s+'
COMMENTS = r'//.+$'
EMPTY = re.escape(EMPTY_SYMBOL)
def parse(self, file: str) -> list[Rule]:
"""
Parsing file according to syntax, specified
in class variables. Hardcoded for now (and forever)
"""
with open(file, 'r') as f:
text = f.read()
lines = self._get_lines(text)
rules = list()
for line in lines:
rules.append(self._parse_rule(line))
return rules
def _parse_rule(self, line: str) -> Rule:
"""
tries to parse rule according to set grammar
"""
tokens = re.split(self.IGNORE, line)
# we always expect 3 parts: operand, arrow, target
arrow = tokens[1]
is_blocking = None
if re.fullmatch(self.TRANSFORM, arrow):
is_blocking = False
elif re.fullmatch(self.B_TRANSFORM, arrow):
is_blocking = True
else:
raise ValueError(f"Can't recognize transform symbol. "
f"\"{self.TRANSFORM}\" or \"{self.B_TRANSFORM}\""
f" expected, but \"{arrow}\" encountered")
#optimising empty symbol
return Rule(
operand=self._optimise_empty(tokens[0]),
target=self._optimise_empty(tokens[2]),
is_blocking=is_blocking
)
def _optimise_empty(self, string: str) -> str:
"""
Empty symbol has meaning only while it's the only
symbol in the string (I hope i'm not wrong right now),
so all empty symbols can be optimised
Returns sting without EMPTY symbols if deleting them
is semantically possible, returns unchanges string if
nothing can be optimised
NOTE: right now contains naive implementation
"""
string = re.sub(self.EMPTY+'+', EMPTY_SYMBOL, string)
if re.fullmatch(self.EMPTY, string):
return string
return re.sub(self.EMPTY, '', string)
def _get_lines(self, src: str) -> list[str]:
"""
Get cleaned lines only with rules to parse
"""
text = self._remove_comments(src)
text = self._strip_lines(text)
lines = list(filter(lambda x: x != '', text.split(self.NEWLINE)))
return lines
def _remove_comments(self, src: str) -> str:
"""
removes comments from end of lines and returns
cleaned text
"""
return re.sub(self.COMMENTS, '', src, flags=re.M)
def _strip_lines(self, src: str) -> str:
"""
Strips whitespaces at the end of lines
"""
result = re.sub(r' +$', '', src, flags=re.M)
return result