from rule import Rule, EMPTY_SYMBOL import re class RulesParser: TRANSFORM = r'->' B_TRANSFORM = r'->\|' NEWLINE = '\n' IGNORE = r'\s+' COMMENTS = r'//.+$' EMPTY = re.escape(EMPTY_SYMBOL) def parse(self, file: str) -> list[Rule]: """ Parsing file according to syntax, specified in class variables. Hardcoded for now (and forever) """ with open(file, 'r') as f: text = f.read() lines = self._get_lines(text) rules = list() for line in lines: rules.append(self._parse_rule(line)) return rules def _parse_rule(self, line: str) -> Rule: """ tries to parse rule according to set grammar """ tokens = re.split(self.IGNORE, line) # we always expect 3 parts: operand, arrow, target arrow = tokens[1] is_blocking = None if re.fullmatch(self.TRANSFORM, arrow): is_blocking = False elif re.fullmatch(self.B_TRANSFORM, arrow): is_blocking = True else: raise ValueError(f"Can't recognize transform symbol. " f"\"{self.TRANSFORM}\" or \"{self.B_TRANSFORM}\"" f" expected, but \"{arrow}\" encountered") #optimising empty symbol return Rule( operand=self._optimise_empty(tokens[0]), target=self._optimise_empty(tokens[2]), is_blocking=is_blocking ) def _optimise_empty(self, string: str) -> str: """ Empty symbol has meaning only while it's the only symbol in the string (I hope i'm not wrong right now), so all empty symbols can be optimised Returns sting without EMPTY symbols if deleting them is semantically possible, returns unchanges string if nothing can be optimised NOTE: right now contains naive implementation """ string = re.sub(self.EMPTY+'+', EMPTY_SYMBOL, string) if re.fullmatch(self.EMPTY, string): return string return re.sub(self.EMPTY, '', string) def _get_lines(self, src: str) -> list[str]: """ Get cleaned lines only with rules to parse """ text = self._remove_comments(src) text = self._strip_lines(text) lines = list(filter(lambda x: x != '', text.split(self.NEWLINE))) return lines def _remove_comments(self, src: str) -> str: """ removes comments from end of lines and returns cleaned text """ return re.sub(self.COMMENTS, '', src, flags=re.M) def _strip_lines(self, src: str) -> str: """ Strips whitespaces at the end of lines """ result = re.sub(r' +$', '', src, flags=re.M) return result