Files
mathlogic/lab1/programm/lexer.py
2025-04-29 13:13:11 +03:00

67 lines
1.9 KiB
Python

from __future__ import annotations
import re
from dataclasses import dataclass
from typing import Callable, Iterable
@dataclass
class Lexem:
text: str
type_name: str
value: str
class LexemeType:
def __init__(
self,
name: str,
pattern: str,
value_func: Callable[[str], str] = lambda _: "",
):
self.name = name
self.regex = re.compile(r"\s*(" + pattern + ")")
self.value_func = value_func
def consume(self, text: str) -> tuple[Lexem | None, str]:
match = self.regex.match(text)
if match:
lexeme_text = match.group(1)
value = self.value_func(lexeme_text)
rest = text[match.end() :]
return Lexem(lexeme_text, self.name, value), rest
return None, text
class Lexer:
def __init__(
self,
lexeme_types: Iterable[LexemeType],
error_regex: str,
skip_types: Iterable[str] = [],
):
self.lexeme_types = lexeme_types
self.skip_types = skip_types
self.error_regex = re.compile(r"\s*(" + error_regex + ")")
def analyze(self, text: str) -> list[Lexem]:
lexems: list[Lexem] = []
while text.strip():
for lex_type in self.lexeme_types:
lexem, new_text = lex_type.consume(text)
if lexem:
if lexem.type_name not in self.skip_types:
lexems.append(lexem)
text = new_text
break
else:
error_lexeme, text = self._consume_error(text)
return lexems
def _consume_error(self, text: str) -> tuple[Lexem, str]:
match = self.error_regex.match(text)
err_text = match.group(1) if match else text.strip()
print(f"Недопустимая лексема: {err_text}")
rest = text[match.end() :] if match else ""
return Lexem(err_text, "ERROR", ""), rest