from rule import Rule, EMPTY_SYMBOL import re class RulesParser: TRANSFORM = r'->' B_TRANSFORM = r'->\|' NEWLINE = '\n' IGNORE = r'\s+' COMMENTS = r'//.+$' EMPTY = re.escape(EMPTY_SYMBOL) def parse(self, file: str) -> list[Rule]: """ Parsing file according to syntax, specified in class variables. Hardcoded for now (and forever) """ with open(file, 'r') as f: text = f.read() lines = self._get_lines(text) rules = list() for line in lines: rules.append(self._parse_rule(line)) return rules def _parse_rule(self, line: str) -> Rule: """ tries to parse rule according to set grammar """ tokens = re.split(self.IGNORE, line) # we always expect 3 parts: operand, arrow, target arrow = tokens[1] is_blocking = None if re.fullmatch(self.TRANSFORM, arrow): is_blocking = False elif re.fullmatch(self.B_TRANSFORM, arrow): is_blocking = True else: raise ValueError(f"Can't recognize transform symbol. " f"\"{self.TRANSFORM}\" or \"{self.B_TRANSFORM}\"" f" expected, but \"{arrow}\" encountered") return Rule( operand=tokens[0], target=tokens[2], is_blocking=is_blocking ) def _get_lines(self, src: str) -> list[str]: """ Get cleaned lines only with rules to parse """ text = self._remove_comments(src) text = self._strip_lines(text) lines = list(filter(lambda x: x != '', text.split(self.NEWLINE))) return lines def _remove_comments(self, src: str) -> str: """ removes comments from end of lines and returns cleaned text """ return re.sub(self.COMMENTS, '', src, flags=re.M) def _strip_lines(self, src: str) -> str: """ Strips whitespaces at the end of lines """ result = re.sub(r' +$', '', src, flags=re.M) # result = re.sub(r"\n+", r'\n', result) return result