from __future__ import annotations from typing import Dict import regex from ..services.config_repo import ConfigRepository class PatternService: """Builds regex patterns for the configured keywords.""" def __init__(self, repo: ConfigRepository) -> None: self._repo = repo def load_patterns(self) -> Dict[str, regex.Pattern]: keywords = self._repo.read_keywords() word_patterns: Dict[str, regex.Pattern] = {} emoji_pattern = ( r"[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF" r"\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]" ) for word in keywords: original_word = word if word.endswith("**"): word = word[:-2] pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\p{{L}}{{0,6}}\b" elif word.endswith("*"): word = word[:-1] pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\p{{L}}{{0,3}}\b" elif word.startswith("##"): word = word[2:] pattern = rf"(?i)(?:{emoji_pattern})*(?<=\b\d{{0,6}}){regex.escape(word)}\b" elif word.startswith("#"): word = word[1:] pattern = rf"(?i)(?:{emoji_pattern})*(?<=\b\d{{0,3}}){regex.escape(word)}\b" else: pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\b" try: compiled = regex.compile(pattern) word_patterns[original_word] = compiled except regex.error as exc: # Skip invalid patterns but continue building others. print(f"Invalid regex pattern for '{original_word}': {exc}") return word_patterns @staticmethod def escape_markdown(text: str) -> str: return regex.sub(r"(\*\*)", "* *", text)