51 lines
1.8 KiB
Python
51 lines
1.8 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Dict
|
|
|
|
import regex
|
|
|
|
from ..services.config_repo import ConfigRepository
|
|
|
|
|
|
class PatternService:
|
|
"""Builds regex patterns for the configured keywords."""
|
|
|
|
def __init__(self, repo: ConfigRepository) -> None:
|
|
self._repo = repo
|
|
|
|
def load_patterns(self) -> Dict[str, regex.Pattern]:
|
|
keywords = self._repo.read_keywords()
|
|
word_patterns: Dict[str, regex.Pattern] = {}
|
|
emoji_pattern = (
|
|
r"[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF"
|
|
r"\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]"
|
|
)
|
|
|
|
for word in keywords:
|
|
original_word = word
|
|
if word.endswith("**"):
|
|
word = word[:-2]
|
|
pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\p{{L}}{{0,6}}\b"
|
|
elif word.endswith("*"):
|
|
word = word[:-1]
|
|
pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\p{{L}}{{0,3}}\b"
|
|
elif word.startswith("##"):
|
|
word = word[2:]
|
|
pattern = rf"(?i)(?:{emoji_pattern})*(?<=\b\d{{0,6}}){regex.escape(word)}\b"
|
|
elif word.startswith("#"):
|
|
word = word[1:]
|
|
pattern = rf"(?i)(?:{emoji_pattern})*(?<=\b\d{{0,3}}){regex.escape(word)}\b"
|
|
else:
|
|
pattern = rf"\b(?i)(?:{emoji_pattern})*{regex.escape(word)}\b"
|
|
try:
|
|
compiled = regex.compile(pattern)
|
|
word_patterns[original_word] = compiled
|
|
except regex.error as exc:
|
|
# Skip invalid patterns but continue building others.
|
|
print(f"Invalid regex pattern for '{original_word}': {exc}")
|
|
return word_patterns
|
|
|
|
@staticmethod
|
|
def escape_markdown(text: str) -> str:
|
|
return regex.sub(r"(\*\*)", "* *", text)
|