tg-bot/backend/app/services/pattern_service.py
2025-12-04 09:52:39 +08:00

51 lines
1.8 KiB
Python

from __future__ import annotations
from typing import Dict
import regex
from ..services.config_repo import ConfigRepository
class PatternService:
"""Builds regex patterns for the configured keywords."""
def __init__(self, repo: ConfigRepository) -> None:
self._repo = repo
def load_patterns(self) -> Dict[str, regex.Pattern]:
keywords = self._repo.read_keywords()
word_patterns: Dict[str, regex.Pattern] = {}
emoji_pattern = (
r"[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF"
r"\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]"
)
for word in keywords:
original_word = word
if word.endswith("**"):
word = word[:-2]
pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\p{{L}}{{0,6}}\b"
elif word.endswith("*"):
word = word[:-1]
pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\p{{L}}{{0,3}}\b"
elif word.startswith("##"):
word = word[2:]
pattern = rf"(?i)(?:{emoji_pattern})*(?<=\b\d{{0,6}}){regex.escape(word)}\b"
elif word.startswith("#"):
word = word[1:]
pattern = rf"(?i)(?:{emoji_pattern})*(?<=\b\d{{0,3}}){regex.escape(word)}\b"
else:
pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\b"
try:
compiled = regex.compile(pattern)
word_patterns[original_word] = compiled
except regex.error as exc:
# Skip invalid patterns but continue building others.
print(f"Invalid regex pattern for '{original_word}': {exc}")
return word_patterns
@staticmethod
def escape_markdown(text: str) -> str:
return regex.sub(r"(\*\*)", "* *", text)