init2
This commit is contained in:
parent
3637f9d9df
commit
11e2fbc6c9
4
.env
Normal file
4
.env
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# 环境变量示例,复制为 .env 使用
|
||||||
|
# OpenAI 兼容接口的 API Key 与 Base URL(若不需要多模态,可留空)
|
||||||
|
OPENAI_API_KEY=sk-22WA5NxNePfQIr6ArU3oqO75IrsZNTTakqp1ImZO0uKhhJoy
|
||||||
|
OPENAI_BASE_URL=https://api.wgetai.com/v1
|
||||||
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
dsl.json
|
||||||
|
autodemo/__pycache__/*.pyc
|
||||||
|
dsl_schema.json
|
||||||
|
sessions/*
|
||||||
5
@AutomationLog.txt
Normal file
5
@AutomationLog.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
[WinError -2147221008] 尚未调用 CoInitialize。
|
||||||
|
Can not load UIAutomationCore.dll.
|
||||||
|
1, You may need to install Windows Update KB971513 if your OS is Windows XP, see https://github.com/yinkaisheng/WindowsUpdateKB971513ForIUIAutomation
|
||||||
|
2, You need to use an UIAutomationInitializerInThread object if use uiautomation in a thread, see demos/uiautomation_in_thread.py
|
||||||
11
autodemo/__init__.py
Normal file
11
autodemo/__init__.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""轻量级示教式自动化原型。"""
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"schema",
|
||||||
|
"recorder",
|
||||||
|
"llm",
|
||||||
|
"dsl",
|
||||||
|
"executor",
|
||||||
|
]
|
||||||
8
autodemo/__main__.py
Normal file
8
autodemo/__main__.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""允许 python -m autodemo 运行 CLI。"""
|
||||||
|
|
||||||
|
from .cli import main
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
91
autodemo/cli.py
Normal file
91
autodemo/cli.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""Command line entry point."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .dsl import load_dsl, save_dsl
|
||||||
|
from .executor import ExecContext, execute_spec
|
||||||
|
from .llm import DummyLLM, LLMClient
|
||||||
|
from .recorder import Recorder
|
||||||
|
from .schema import EventRecord
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_record(args: argparse.Namespace) -> None:
|
||||||
|
"""Start multimodal recording."""
|
||||||
|
rec = Recorder(Path(args.out), hotkey=args.hotkey, fps=args.fps, screen=args.screen)
|
||||||
|
print(f"Recording... press {args.hotkey} to stop.")
|
||||||
|
session_dir = rec.start()
|
||||||
|
print(f"Session saved to: {session_dir}")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_events(path: Path) -> list[EventRecord]:
|
||||||
|
events = []
|
||||||
|
with path.open("r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
events.append(EventRecord.parse_obj(json.loads(line)))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_infer(args: argparse.Namespace) -> None:
|
||||||
|
"""Infer DSL from recorded events."""
|
||||||
|
events = _load_events(Path(args.session))
|
||||||
|
client: LLMClient = DummyLLM()
|
||||||
|
spec = client.generate(events)
|
||||||
|
out_path = Path(args.output)
|
||||||
|
save_dsl(spec, out_path)
|
||||||
|
print(f"DSL saved to {out_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_run(args: argparse.Namespace) -> None:
|
||||||
|
"""Execute DSL."""
|
||||||
|
spec = load_dsl(Path(args.dsl))
|
||||||
|
if args.params:
|
||||||
|
spec.params.update(json.loads(args.params))
|
||||||
|
ctx = ExecContext(allow_title=args.allow_title, dry_run=args.dry_run)
|
||||||
|
execute_spec(spec, ctx)
|
||||||
|
print("Done")
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
"""Build CLI parser."""
|
||||||
|
parser = argparse.ArgumentParser(description="示教式自动化原型")
|
||||||
|
sub = parser.add_subparsers(dest="command", required=True)
|
||||||
|
|
||||||
|
p_rec = sub.add_parser("record", help="开始录制")
|
||||||
|
p_rec.add_argument("--out", type=str, default="sessions", help="输出目录")
|
||||||
|
p_rec.add_argument("--hotkey", type=str, default="F9", help="停止录制的热键")
|
||||||
|
p_rec.add_argument("--fps", type=int, default=12, help="录屏帧率")
|
||||||
|
p_rec.add_argument("--screen", type=int, default=0, help="屏幕编号,默认主屏")
|
||||||
|
p_rec.set_defaults(func=cmd_record)
|
||||||
|
|
||||||
|
p_inf = sub.add_parser("infer", help="LLM 归纳生成 DSL")
|
||||||
|
p_inf.add_argument("--session", type=str, required=True, help="events.jsonl 文件")
|
||||||
|
p_inf.add_argument("--output", type=str, default="flow.yaml", help="输出 DSL 路径")
|
||||||
|
p_inf.set_defaults(func=cmd_infer)
|
||||||
|
|
||||||
|
p_run = sub.add_parser("run", help="执行 DSL")
|
||||||
|
p_run.add_argument("--dsl", type=str, required=True, help="DSL YAML 文件")
|
||||||
|
p_run.add_argument("--params", type=str, help="JSON 参数覆盖")
|
||||||
|
p_run.add_argument("--allow-title", type=str, default="记事本|Notepad", help="允许的窗口标题正则")
|
||||||
|
p_run.add_argument("--dry-run", action="store_true", help="仅打印动作不执行")
|
||||||
|
p_run.set_defaults(func=cmd_run)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Entrypoint."""
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
args.func(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
24
autodemo/dsl.py
Normal file
24
autodemo/dsl.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""DSL 的加载与保存。"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from .schema import DSLSpec
|
||||||
|
|
||||||
|
|
||||||
|
def save_dsl(spec: DSLSpec, path: Path) -> None:
|
||||||
|
"""保存 DSL 为 YAML。"""
|
||||||
|
with path.open("w", encoding="utf-8") as f:
|
||||||
|
yaml.safe_dump(spec.dict(), f, allow_unicode=True, sort_keys=False)
|
||||||
|
|
||||||
|
|
||||||
|
def load_dsl(path: Path) -> DSLSpec:
|
||||||
|
"""从 YAML 读取 DSL。"""
|
||||||
|
with path.open("r", encoding="utf-8") as f:
|
||||||
|
data: Dict[str, Any] = yaml.safe_load(f)
|
||||||
|
return DSLSpec.parse_obj(data)
|
||||||
|
|
||||||
125
autodemo/executor.py
Normal file
125
autodemo/executor.py
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""执行层:根据 DSL 进行 UI 自动化。"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import uiautomation as auto # type: ignore
|
||||||
|
|
||||||
|
from .schema import DSLSpec
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExecContext:
|
||||||
|
"""执行上下文。"""
|
||||||
|
|
||||||
|
allow_title: str
|
||||||
|
dry_run: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
def _match_window(allow_title: str) -> Optional[auto.Control]:
|
||||||
|
"""仅在窗口标题匹配白名单时返回前台窗口。"""
|
||||||
|
ctrl = auto.GetForegroundControl()
|
||||||
|
if ctrl is None:
|
||||||
|
return None
|
||||||
|
if ctrl.Name is None:
|
||||||
|
return None
|
||||||
|
if not re.search(allow_title, ctrl.Name):
|
||||||
|
return None
|
||||||
|
return ctrl
|
||||||
|
|
||||||
|
|
||||||
|
def _find_control(root: auto.Control, locator: Dict[str, Any], timeout: float) -> Optional[auto.Control]:
|
||||||
|
"""根据 locator 在 root 下查找控件。"""
|
||||||
|
start = time.time()
|
||||||
|
while time.time() - start <= timeout:
|
||||||
|
try:
|
||||||
|
conds = []
|
||||||
|
if "AutomationId" in locator:
|
||||||
|
conds.append(auto.Control.AutomationId == locator["AutomationId"])
|
||||||
|
if "Name" in locator:
|
||||||
|
conds.append(auto.Control.Name == locator["Name"])
|
||||||
|
if "ClassName" in locator:
|
||||||
|
conds.append(auto.Control.ClassName == locator["ClassName"])
|
||||||
|
if "ControlType" in locator:
|
||||||
|
conds.append(auto.Control.ControlTypeName == locator["ControlType"])
|
||||||
|
if conds:
|
||||||
|
ctrl = root.Control(searchDepth=4, condition=auto.AndCondition(*conds))
|
||||||
|
else:
|
||||||
|
ctrl = root
|
||||||
|
if ctrl:
|
||||||
|
return ctrl
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(0.5)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _do_action(ctrl: auto.Control, step: Dict[str, Any], dry_run: bool) -> None:
|
||||||
|
"""执行单步动作。"""
|
||||||
|
action = step.get("action")
|
||||||
|
text = step.get("text", "")
|
||||||
|
if dry_run:
|
||||||
|
print(f"[dry-run] {action} -> target={step.get('target')} text={text}")
|
||||||
|
return
|
||||||
|
if action == "click":
|
||||||
|
ctrl.Click()
|
||||||
|
elif action == "type":
|
||||||
|
ctrl.SetFocus()
|
||||||
|
auto.SendKeys(text)
|
||||||
|
elif action == "set_value":
|
||||||
|
try:
|
||||||
|
ctrl.GetValuePattern().SetValue(text)
|
||||||
|
except Exception:
|
||||||
|
ctrl.SendKeys(text)
|
||||||
|
elif action == "assert_exists":
|
||||||
|
assert ctrl is not None, "控件未找到"
|
||||||
|
elif action == "wait_for":
|
||||||
|
# wait_for 仅等待存在
|
||||||
|
time.sleep(float(step.get("waits", {}).get("appear", 1.0)))
|
||||||
|
|
||||||
|
|
||||||
|
def execute_spec(spec: DSLSpec, ctx: ExecContext) -> None:
|
||||||
|
"""执行完整的 DSL。"""
|
||||||
|
root = _match_window(ctx.allow_title)
|
||||||
|
if root is None:
|
||||||
|
raise RuntimeError(f"前台窗口标题未匹配白名单: {ctx.allow_title}")
|
||||||
|
|
||||||
|
def run_steps(steps: List[Any]) -> None:
|
||||||
|
for step in steps:
|
||||||
|
if "for_each" in step:
|
||||||
|
# 简单遍历列表参数
|
||||||
|
iterable = spec.params.get(step["for_each"], [])
|
||||||
|
for item in iterable:
|
||||||
|
run_steps(step.get("steps", []))
|
||||||
|
elif "if_condition" in step:
|
||||||
|
cond = step["if_condition"]
|
||||||
|
if spec.params.get(cond):
|
||||||
|
run_steps(step.get("steps", []))
|
||||||
|
else:
|
||||||
|
run_steps(step.get("else_steps", []))
|
||||||
|
else:
|
||||||
|
target = step.get("target", {})
|
||||||
|
timeout = float(step.get("waits", {}).get("appear", spec.waits.get("appear", 5.0)))
|
||||||
|
retry = step.get("retry_policy", spec.retry_policy)
|
||||||
|
attempts = int(retry.get("max_attempts", 1))
|
||||||
|
interval = float(retry.get("interval", 1.0))
|
||||||
|
last_err: Optional[Exception] = None
|
||||||
|
for _ in range(attempts):
|
||||||
|
ctrl = _find_control(root, target, timeout)
|
||||||
|
try:
|
||||||
|
if ctrl is None:
|
||||||
|
raise RuntimeError("控件未找到")
|
||||||
|
_do_action(ctrl, step, ctx.dry_run)
|
||||||
|
last_err = None
|
||||||
|
break
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
last_err = e
|
||||||
|
time.sleep(interval)
|
||||||
|
if last_err:
|
||||||
|
raise last_err
|
||||||
|
|
||||||
|
run_steps(spec.steps)
|
||||||
396
autodemo/infer.py
Normal file
396
autodemo/infer.py
Normal file
@ -0,0 +1,396 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""多模态归纳:读取 session 目录,组装提示,调用 LLM,生成 DSL"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import requests # type: ignore
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 优先使用 python-dotenv,缺失则退回手动解析
|
||||||
|
from dotenv import load_dotenv # type: ignore
|
||||||
|
except Exception:
|
||||||
|
load_dotenv = None
|
||||||
|
|
||||||
|
from .prompt_templates import SYSTEM_PROMPT, render_user_prompt
|
||||||
|
from .schema import DSLSpec, EventRecord, FramePaths, UISnapshot, UISelector
|
||||||
|
|
||||||
|
|
||||||
|
# --------- Pydantic v1/v2 兼容辅助 ---------
|
||||||
|
def _model_validate(cls, data: Any) -> Any:
|
||||||
|
if hasattr(cls, "model_validate"):
|
||||||
|
return cls.model_validate(data) # type: ignore[attr-defined]
|
||||||
|
return cls.parse_obj(data) # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
|
def _model_dump(obj: Any, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
if hasattr(obj, "model_dump"):
|
||||||
|
return obj.model_dump(**kwargs) # type: ignore[attr-defined]
|
||||||
|
return obj.dict(**kwargs) # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
|
def _load_env_file() -> None:
|
||||||
|
"""加载项目根目录的 .env,优先使用 python-dotenv,缺失则手工解析"""
|
||||||
|
env_path = Path(__file__).resolve().parent.parent / ".env"
|
||||||
|
if load_dotenv:
|
||||||
|
load_dotenv(env_path)
|
||||||
|
return
|
||||||
|
if not env_path.exists():
|
||||||
|
return
|
||||||
|
for line in env_path.read_text(encoding="utf-8").splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#") or "=" not in line:
|
||||||
|
continue
|
||||||
|
key, val = line.split("=", 1)
|
||||||
|
os.environ.setdefault(key.strip(), val.strip())
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_assertions(spec_dict: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""将 assertions 内的非字符串条目转换为字符串,防止验证失败"""
|
||||||
|
assertions = spec_dict.get("assertions")
|
||||||
|
if isinstance(assertions, list):
|
||||||
|
new_items = []
|
||||||
|
for item in assertions:
|
||||||
|
if isinstance(item, str):
|
||||||
|
new_items.append(item)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
new_items.append(json.dumps(item, ensure_ascii=False))
|
||||||
|
except Exception:
|
||||||
|
new_items.append(str(item))
|
||||||
|
spec_dict["assertions"] = new_items
|
||||||
|
return spec_dict
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_code_fences(text: str) -> str:
|
||||||
|
"""去除 ```json ... ``` 或 ``` ... ``` 包裹"""
|
||||||
|
stripped = text.strip()
|
||||||
|
if stripped.startswith("```"):
|
||||||
|
parts = stripped.split("```")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
return parts[1].lstrip("json").strip() if parts[1].startswith("json") else parts[1].strip()
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_steps(spec_dict: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""规范化 steps 字段到 schema 支持的动作/字段"""
|
||||||
|
steps = spec_dict.get("steps")
|
||||||
|
if not isinstance(steps, list):
|
||||||
|
return spec_dict
|
||||||
|
normalized = []
|
||||||
|
for step in steps:
|
||||||
|
if not isinstance(step, dict):
|
||||||
|
continue
|
||||||
|
# 将 selector -> target
|
||||||
|
if "target" not in step and "selector" in step:
|
||||||
|
step["target"] = step["selector"]
|
||||||
|
step.pop("selector", None)
|
||||||
|
|
||||||
|
action = step.get("action")
|
||||||
|
# value -> text 归一化,兼容 set_value/type
|
||||||
|
if "value" in step and "text" not in step:
|
||||||
|
step["text"] = step.get("value")
|
||||||
|
step.pop("value", None)
|
||||||
|
|
||||||
|
# 处理 wait_for_window 自定义动作
|
||||||
|
if action == "wait_for_window":
|
||||||
|
title = step.pop("window_title_part", None)
|
||||||
|
timeout = step.pop("timeout", None)
|
||||||
|
step["action"] = "wait_for"
|
||||||
|
step["target"] = step.get("target") or {}
|
||||||
|
if title:
|
||||||
|
step["target"].setdefault("Name", title)
|
||||||
|
step["target"].setdefault("ControlType", "WindowControl")
|
||||||
|
if timeout:
|
||||||
|
secs = float(timeout) / 1000.0
|
||||||
|
step["waits"] = {"appear": secs, "disappear": 5.0}
|
||||||
|
# 若 action 不在允许列表,降级为 assert_exists
|
||||||
|
if step.get("action") not in {"click", "type", "set_value", "assert_exists", "wait_for"}:
|
||||||
|
step["action"] = "assert_exists"
|
||||||
|
|
||||||
|
# 标准化 ControlType 命名
|
||||||
|
tgt = step.get("target", {})
|
||||||
|
if isinstance(tgt, dict) and tgt.get("ControlType") == "Window":
|
||||||
|
tgt["ControlType"] = "WindowControl"
|
||||||
|
normalized.append(step)
|
||||||
|
spec_dict["steps"] = normalized
|
||||||
|
return spec_dict
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------- LLM 抽象 ----------------
|
||||||
|
class LLMClient:
|
||||||
|
"""LLM 抽象接口"""
|
||||||
|
|
||||||
|
def generate(self, system_prompt: str, user_prompt: str, images: Optional[List[Dict[str, Any]]] = None) -> str:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
class DummyLLM(LLMClient):
|
||||||
|
"""纯文本离线生成,基于事件启发式"""
|
||||||
|
|
||||||
|
def generate(self, system_prompt: str, user_prompt: str, images: Optional[List[Dict[str, Any]]] = None) -> str:
|
||||||
|
# 简单规则:点击 -> click,text_input -> type;若窗口标题包含记事本且有文本输入,补保存按钮
|
||||||
|
data = json.loads(user_prompt.split("事件摘要(JSON):")[-1])
|
||||||
|
steps: List[Dict[str, Any]] = []
|
||||||
|
params: Dict[str, Any] = {}
|
||||||
|
assertions: List[str] = []
|
||||||
|
saw_text = False
|
||||||
|
saw_notepad = False
|
||||||
|
for ev in data:
|
||||||
|
ev_type = ev.get("event_type")
|
||||||
|
selector = ev.get("uia_selector") or {}
|
||||||
|
if ev_type == "mouse_click":
|
||||||
|
steps.append({"action": "click", "target": selector})
|
||||||
|
elif ev_type == "text_input":
|
||||||
|
saw_text = True
|
||||||
|
params.setdefault("text", ev.get("text", ""))
|
||||||
|
steps.append({"action": "type", "target": selector, "text": "{{text}}"})
|
||||||
|
if ev.get("window_title") and "记事本" in ev.get("window_title", ""):
|
||||||
|
saw_notepad = True
|
||||||
|
if saw_notepad and saw_text:
|
||||||
|
assertions.append("文本已输入记事本")
|
||||||
|
steps.append({"action": "click", "target": {"Name": "保存", "ControlType": "Button"}})
|
||||||
|
if not assertions:
|
||||||
|
assertions.append("关键控件存在")
|
||||||
|
spec = {
|
||||||
|
"params": params,
|
||||||
|
"steps": steps or [{"action": "assert_exists", "target": {"Name": "dummy"}}],
|
||||||
|
"assertions": assertions,
|
||||||
|
"retry_policy": {"max_attempts": 2, "interval": 1.0},
|
||||||
|
"waits": {"appear": 5.0, "disappear": 5.0},
|
||||||
|
}
|
||||||
|
return json.dumps(spec, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIVisionClient(LLMClient):
|
||||||
|
"""兼容 OpenAI 接口的多模态客户端,支持自定义 base_url 和 model"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
api_key: str,
|
||||||
|
model: str = "gpt-5.1-high",
|
||||||
|
base_url: str = "https://api.wgetai.com/v1",
|
||||||
|
timeout: float = 120.0,
|
||||||
|
retries: int = 1,
|
||||||
|
) -> None:
|
||||||
|
self.api_key = api_key
|
||||||
|
self.model = model
|
||||||
|
self.base_url = base_url.rstrip("/")
|
||||||
|
self.timeout = timeout
|
||||||
|
self.retries = max(0, retries)
|
||||||
|
|
||||||
|
def generate(self, system_prompt: str, user_prompt: str, images: Optional[List[Dict[str, Any]]] = None) -> str:
|
||||||
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
||||||
|
content: List[Dict[str, Any]] = [{"type": "text", "text": user_prompt}]
|
||||||
|
for img in images or []:
|
||||||
|
content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img['b64']}"}})
|
||||||
|
payload = {
|
||||||
|
"model": self.model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT},
|
||||||
|
{"role": "user", "content": content},
|
||||||
|
],
|
||||||
|
"temperature": 0.2,
|
||||||
|
}
|
||||||
|
url = f"{self.base_url}/chat/completions"
|
||||||
|
last_err: Optional[Exception] = None
|
||||||
|
for attempt in range(self.retries + 1):
|
||||||
|
try:
|
||||||
|
resp = requests.post(url, headers=headers, json=payload, timeout=self.timeout)
|
||||||
|
resp.raise_for_status()
|
||||||
|
text = resp.json()["choices"][0]["message"]["content"]
|
||||||
|
return text
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
last_err = exc
|
||||||
|
if attempt < self.retries:
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
raise last_err or RuntimeError("LLM 调用失败")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------- 数据加载与压缩 ----------------
|
||||||
|
def _load_events(session_dir: Path) -> List[EventRecord]:
|
||||||
|
events_path = session_dir / "events.jsonl"
|
||||||
|
events: List[EventRecord] = []
|
||||||
|
with events_path.open("r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
events.append(_model_validate(EventRecord, json.loads(line)))
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def _load_snapshot(path: Optional[str]) -> Optional[UISnapshot]:
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
p = Path(path)
|
||||||
|
if not p.exists():
|
||||||
|
return None
|
||||||
|
with p.open("r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return _model_validate(UISnapshot, data)
|
||||||
|
|
||||||
|
|
||||||
|
def _best_image(frame_paths: Optional[FramePaths]) -> Optional[str]:
|
||||||
|
if not frame_paths:
|
||||||
|
return None
|
||||||
|
for cand in [frame_paths.crop_element, frame_paths.crop_mouse, frame_paths.full]:
|
||||||
|
if cand and Path(cand).exists():
|
||||||
|
return cand
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _selector_summary(selector: Optional[UISelector]) -> Dict[str, Any]:
|
||||||
|
if not selector:
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"AutomationId": selector.automation_id,
|
||||||
|
"Name": selector.name,
|
||||||
|
"ClassName": selector.class_name,
|
||||||
|
"ControlType": selector.control_type,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _compress_tree(snapshot: Optional[UISnapshot], selector: Optional[UISelector]) -> List[Dict[str, Any]]:
|
||||||
|
"""压缩 UI 树:保留深度<=2,或与命中控件同名/同类型的兄弟"""
|
||||||
|
if not snapshot:
|
||||||
|
return []
|
||||||
|
nodes = []
|
||||||
|
for node in snapshot.tree:
|
||||||
|
if node.depth <= 2:
|
||||||
|
nodes.append(_model_dump(node, exclude_none=True))
|
||||||
|
else:
|
||||||
|
if selector and (node.name == selector.name or node.control_type == selector.control_type):
|
||||||
|
nodes.append(_model_dump(node, exclude_none=True))
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
|
||||||
|
def _encode_image_b64(path: Optional[str]) -> Optional[str]:
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
return base64.b64encode(f.read()).decode("ascii")
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _pack_events(events: List[EventRecord], multimodal: bool) -> List[Dict[str, Any]]:
|
||||||
|
packed: List[Dict[str, Any]] = []
|
||||||
|
for ev in events:
|
||||||
|
if ev.event_type not in {"mouse_click", "text_input", "window_change"}:
|
||||||
|
continue
|
||||||
|
img_path = _best_image(ev.frame_paths)
|
||||||
|
snapshot = _load_snapshot(ev.ui_snapshot)
|
||||||
|
selector = ev.uia
|
||||||
|
tree = _compress_tree(snapshot, selector)
|
||||||
|
item: Dict[str, Any] = {
|
||||||
|
"event_type": ev.event_type,
|
||||||
|
"ts": ev.ts,
|
||||||
|
"video_time_offset_ms": ev.video_time_offset_ms,
|
||||||
|
"text": ev.text,
|
||||||
|
"window_title": ev.window.title if ev.window else None,
|
||||||
|
"window_process": ev.window.process_name if ev.window else None,
|
||||||
|
"uia_selector": _selector_summary(selector),
|
||||||
|
"uia_tree": tree,
|
||||||
|
"frame_path": img_path,
|
||||||
|
}
|
||||||
|
if multimodal and img_path:
|
||||||
|
b64 = _encode_image_b64(img_path)
|
||||||
|
if b64:
|
||||||
|
item["image_base64"] = b64
|
||||||
|
packed.append(item)
|
||||||
|
return packed
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------- 主入口 ----------------
|
||||||
|
def infer_session(
|
||||||
|
session_dir: Path,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
base_url: Optional[str] = None,
|
||||||
|
model: str = "gpt-5.1-high",
|
||||||
|
timeout: float = 120.0,
|
||||||
|
retries: int = 1,
|
||||||
|
) -> DSLSpec:
|
||||||
|
"""读取 session 目录,返回 DSLSpec"""
|
||||||
|
events = _load_events(session_dir)
|
||||||
|
multimodal = api_key is not None
|
||||||
|
packed = _pack_events(events, multimodal=multimodal)
|
||||||
|
user_prompt = render_user_prompt(packed)
|
||||||
|
client: LLMClient
|
||||||
|
images_payload = [{"b64": e["image_base64"]} for e in packed if "image_base64" in e] if multimodal else None
|
||||||
|
|
||||||
|
raw: str
|
||||||
|
if multimodal:
|
||||||
|
client = OpenAIVisionClient(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url or "https://api.wgetai.com/v1",
|
||||||
|
model=model,
|
||||||
|
timeout=timeout,
|
||||||
|
retries=retries,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
raw = client.generate(SYSTEM_PROMPT, user_prompt, images=images_payload)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
print(f"[warn] 多模态归纳失败,降级为文本-only(原因: {exc})")
|
||||||
|
client = DummyLLM()
|
||||||
|
raw = client.generate(SYSTEM_PROMPT, user_prompt, images=None)
|
||||||
|
else:
|
||||||
|
client = DummyLLM()
|
||||||
|
raw = client.generate(SYSTEM_PROMPT, user_prompt, images=None)
|
||||||
|
|
||||||
|
if not raw or not raw.strip():
|
||||||
|
raise RuntimeError("LLM 返回为空,无法解析为 JSON")
|
||||||
|
cleaned = _strip_code_fences(raw)
|
||||||
|
try:
|
||||||
|
spec_dict = json.loads(cleaned)
|
||||||
|
except Exception as exc:
|
||||||
|
preview = cleaned[:500]
|
||||||
|
raise RuntimeError(f"LLM 返回非 JSON,可见前 500 字符: {preview}") from exc
|
||||||
|
spec_dict = _coerce_assertions(spec_dict)
|
||||||
|
spec_dict = _normalize_steps(spec_dict)
|
||||||
|
return _model_validate(DSLSpec, spec_dict)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="从 session 目录归纳 DSL(支持多模态)")
|
||||||
|
parser.add_argument("--session-dir", type=str, required=True, help="session 目录,包含 events.jsonl / manifest.json / frames / ui_snapshots")
|
||||||
|
parser.add_argument("--out", type=str, default="dsl.json", help="输出 DSL JSON 路径")
|
||||||
|
parser.add_argument("--api-key", type=str, help="LLM API Key,缺省读取环境变量 OPENAI_API_KEY")
|
||||||
|
parser.add_argument("--base-url", type=str, default="https://api.wgetai.com/v1", help="LLM Base URL")
|
||||||
|
parser.add_argument("--model", type=str, default="gpt-5.1-high", help="LLM 模型名")
|
||||||
|
parser.add_argument("--timeout", type=float, default=120.0, help="LLM 请求超时时间(秒)")
|
||||||
|
parser.add_argument("--retries", type=int, default=1, help="LLM 请求重试次数(额外重试次数)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
_load_env_file()
|
||||||
|
|
||||||
|
session_dir = Path(args.session_dir)
|
||||||
|
api_key = args.api_key or os.environ.get("OPENAI_API_KEY")
|
||||||
|
base_url = args.base_url or os.environ.get("OPENAI_BASE_URL")
|
||||||
|
|
||||||
|
spec = infer_session(
|
||||||
|
session_dir,
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
model=args.model,
|
||||||
|
timeout=args.timeout,
|
||||||
|
retries=args.retries,
|
||||||
|
)
|
||||||
|
out_path = Path(args.out)
|
||||||
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with out_path.open("w", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(_model_dump(spec), ensure_ascii=False, indent=2))
|
||||||
|
print(f"DSL 写入: {out_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
65
autodemo/llm.py
Normal file
65
autodemo/llm.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""LLM 抽象与 Dummy 实现。"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from .schema import DSLSpec, EventRecord
|
||||||
|
|
||||||
|
PROMPT_TEMPLATE = """你是一名自动化工程师,请将以下事件序列归纳为可参数化的自动化 DSL。
|
||||||
|
事件序列使用 JSON 描述,每个事件包含 kind、control(AutomationId/Name/ClassName/ControlType/BoundingRect)等。
|
||||||
|
输出 YAML,字段包括:params、steps、assertions、retry_policy、waits,支持 steps 内的 if/else、for_each。
|
||||||
|
输出示例:
|
||||||
|
params:
|
||||||
|
text: "示例参数"
|
||||||
|
steps:
|
||||||
|
- action: click
|
||||||
|
target: {{AutomationId: "15", ControlType: "Edit"}}
|
||||||
|
- action: type
|
||||||
|
target: {{AutomationId: "15"}}
|
||||||
|
text: "{{text}}"
|
||||||
|
assertions:
|
||||||
|
- "输入框非空"
|
||||||
|
retry_policy: {{max_attempts: 2, interval: 1.0}}
|
||||||
|
waits: {{appear: 5.0, disappear: 5.0}}
|
||||||
|
现在请基于输入事件生成 YAML:"""
|
||||||
|
|
||||||
|
|
||||||
|
class LLMClient(ABC):
|
||||||
|
"""LLM 抽象接口。"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def generate(self, events: List[EventRecord]) -> DSLSpec:
|
||||||
|
"""将事件序列转为 DSL 规格。"""
|
||||||
|
|
||||||
|
|
||||||
|
class DummyLLM(LLMClient):
|
||||||
|
"""离线 dummy,实现一个简单的规则映射。"""
|
||||||
|
|
||||||
|
def generate(self, events: List[EventRecord]) -> DSLSpec:
|
||||||
|
steps: List[Dict[str, Any]] = []
|
||||||
|
for ev in events:
|
||||||
|
ctrl = ev.control.dict(by_alias=True) if ev.control else {}
|
||||||
|
if ev.kind == "mouse_click":
|
||||||
|
steps.append({"action": "click", "target": ctrl})
|
||||||
|
elif ev.kind == "key_down" and ev.data.get("name"):
|
||||||
|
# 仅在按键时记录输入
|
||||||
|
steps.append({"action": "type", "target": ctrl, "text": ev.data.get("name")})
|
||||||
|
if not steps:
|
||||||
|
steps.append({"action": "assert_exists", "target": {"Name": "dummy"}})
|
||||||
|
spec = DSLSpec(
|
||||||
|
params={},
|
||||||
|
steps=steps,
|
||||||
|
assertions=["dummy generated"],
|
||||||
|
)
|
||||||
|
return spec
|
||||||
|
|
||||||
|
|
||||||
|
def render_prompt(events: List[EventRecord]) -> str:
|
||||||
|
"""把事件序列渲染到 prompt。"""
|
||||||
|
event_dicts = [ev.dict(by_alias=True) for ev in events]
|
||||||
|
return f"{PROMPT_TEMPLATE}\n\n{yaml.safe_dump(event_dicts, allow_unicode=True)}"
|
||||||
|
|
||||||
32
autodemo/prompt_templates.py
Normal file
32
autodemo/prompt_templates.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""多模态归纳提示词模板"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
# system 提示:约束模型输出和选择器策略
|
||||||
|
SYSTEM_PROMPT = """
|
||||||
|
你是一名Windows桌面自动化工程师,请将用户示教的关键事件归纳为可参数化的自动化DSL。
|
||||||
|
要求:
|
||||||
|
1) 识别界面场景(如记事本、保存对话框、网页表单),推断用户意图。
|
||||||
|
2) 将易变内容参数化(params),动作抽象成可重放的 click/type/set_value 等。
|
||||||
|
3) 选择器优先级:AutomationId > (Name + ControlType) > (ClassName + ControlType),谨慎使用坐标。
|
||||||
|
4) 输出健壮 waits/assertions,避免竞态。
|
||||||
|
5) 严格输出 JSON,符合 dsl_schema.json。
|
||||||
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
def render_user_prompt(packed_events: List[Dict[str, Any]]) -> str:
|
||||||
|
"""构造 user 提示,将打包事件嵌入"""
|
||||||
|
guide = """
|
||||||
|
请阅读以下关键事件,生成符合 dsl_schema.json 的 JSON:
|
||||||
|
- events 已包含点击/文本输入/窗口切换,附带 UIA selector 摘要与可用截图路径。
|
||||||
|
- 生成 params:将文件名、文本内容等抽象为参数。
|
||||||
|
- 生成 steps:click/type/set_value/assert_exists/wait_for;需要等待时填写 waits。
|
||||||
|
- 生成 assertions:确保关键结果(如窗口标题或保存结果)。
|
||||||
|
仅输出 JSON,不要解释。
|
||||||
|
""".strip()
|
||||||
|
return f"{guide}\n\n事件摘要(JSON):\n{json.dumps(packed_events, ensure_ascii=False, indent=2)}"
|
||||||
445
autodemo/recorder.py
Normal file
445
autodemo/recorder.py
Normal file
@ -0,0 +1,445 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""Multimodal recorder for Windows desktop sessions."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
|
import cv2 # type: ignore
|
||||||
|
import numpy as np # type: ignore
|
||||||
|
import psutil # type: ignore
|
||||||
|
import uiautomation as auto # type: ignore
|
||||||
|
from pynput import keyboard, mouse
|
||||||
|
import mss # type: ignore
|
||||||
|
|
||||||
|
from .schema import (
|
||||||
|
EventRecord,
|
||||||
|
FramePaths,
|
||||||
|
MouseInfo,
|
||||||
|
Rect,
|
||||||
|
SessionManifest,
|
||||||
|
UISnapshot,
|
||||||
|
UITreeNode,
|
||||||
|
UISelector,
|
||||||
|
WindowInfo,
|
||||||
|
)
|
||||||
|
from .screen_recorder import ScreenRecorder
|
||||||
|
|
||||||
|
|
||||||
|
class Recorder:
|
||||||
|
"""Capture UI events, UIA context, screenshots, and screen video."""
|
||||||
|
|
||||||
|
def __init__(self, output_dir: Path, hotkey: str = "F9", fps: int = 12, screen: int = 0) -> None:
|
||||||
|
self.output_dir = output_dir
|
||||||
|
self.hotkey = hotkey
|
||||||
|
self.fps = fps
|
||||||
|
self.screen = screen
|
||||||
|
|
||||||
|
self.session_id = str(uuid.uuid4())
|
||||||
|
self.session_dir = self.output_dir / self.session_id
|
||||||
|
self.events_path = self.session_dir / "events.jsonl"
|
||||||
|
self.video_path = self.session_dir / "video.mp4"
|
||||||
|
self.frames_dir = self.session_dir / "frames"
|
||||||
|
self.frames_crops_dir = self.session_dir / "frames_crops"
|
||||||
|
self.ui_snapshots_dir = self.session_dir / "ui_snapshots"
|
||||||
|
|
||||||
|
self.events: List[EventRecord] = []
|
||||||
|
self._stop_event = threading.Event()
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._text_buffer: List[str] = []
|
||||||
|
self._flush_timer: Optional[threading.Timer] = None
|
||||||
|
self._start_perf = 0.0
|
||||||
|
self._start_ts = 0.0
|
||||||
|
self._last_hwnd: Optional[int] = None
|
||||||
|
self._mouse_controller = mouse.Controller()
|
||||||
|
self._screen_recorder: Optional[ScreenRecorder] = None
|
||||||
|
self._window_thread: Optional[threading.Thread] = None
|
||||||
|
self._mouse_listener: Optional[mouse.Listener] = None
|
||||||
|
self._keyboard_listener: Optional[keyboard.Listener] = None
|
||||||
|
self._monitor: Optional[dict] = None
|
||||||
|
self._event_index = 0
|
||||||
|
self._uia_local = threading.local()
|
||||||
|
self._ensure_uia_initialized()
|
||||||
|
|
||||||
|
# Public API ---------------------------------------------------------
|
||||||
|
def start(self) -> Path:
|
||||||
|
"""Start recording until the hotkey is pressed."""
|
||||||
|
self.session_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.frames_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.frames_crops_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.ui_snapshots_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
self._start_perf = time.perf_counter()
|
||||||
|
self._start_ts = time.time()
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitors = sct.monitors
|
||||||
|
if 0 <= self.screen < len(monitors):
|
||||||
|
self._monitor = monitors[self.screen]
|
||||||
|
else:
|
||||||
|
self._monitor = monitors[0]
|
||||||
|
|
||||||
|
self._screen_recorder = ScreenRecorder(self.video_path, fps=self.fps, screen=self.screen)
|
||||||
|
self._screen_recorder.start()
|
||||||
|
|
||||||
|
self._window_thread = threading.Thread(target=self._watch_window, daemon=True)
|
||||||
|
self._window_thread.start()
|
||||||
|
|
||||||
|
self._mouse_listener = mouse.Listener(on_click=self._on_click)
|
||||||
|
self._keyboard_listener = keyboard.Listener(on_press=self._on_key_press)
|
||||||
|
self._mouse_listener.start()
|
||||||
|
self._keyboard_listener.start()
|
||||||
|
|
||||||
|
self._stop_event.wait()
|
||||||
|
self._flush_text_buffer()
|
||||||
|
self._shutdown()
|
||||||
|
return self.session_dir
|
||||||
|
|
||||||
|
# Event handlers -----------------------------------------------------
|
||||||
|
def _on_click(self, x: int, y: int, button: mouse.Button, pressed: bool) -> None:
|
||||||
|
if not pressed or self._stop_event.is_set():
|
||||||
|
return
|
||||||
|
window_info = self._get_window_info()
|
||||||
|
selector = self._hit_test(x, y)
|
||||||
|
mouse_info = MouseInfo(x=int(x), y=int(y), button=str(button).split(".")[-1], action="down")
|
||||||
|
self._record_event(
|
||||||
|
event_type="mouse_click",
|
||||||
|
mouse_info=mouse_info,
|
||||||
|
text=None,
|
||||||
|
uia_selector=selector,
|
||||||
|
window=window_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _on_key_press(self, key: keyboard.Key | keyboard.KeyCode) -> Optional[bool]:
|
||||||
|
if self._is_hotkey(key):
|
||||||
|
self._stop_event.set()
|
||||||
|
return False
|
||||||
|
if self._stop_event.is_set():
|
||||||
|
return False
|
||||||
|
ch = self._key_to_char(key)
|
||||||
|
if ch is None:
|
||||||
|
return None
|
||||||
|
self._text_buffer.append(ch)
|
||||||
|
self._schedule_flush()
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Background watchers ------------------------------------------------
|
||||||
|
def _watch_window(self, interval: float = 0.5) -> None:
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
info = self._get_window_info()
|
||||||
|
hwnd = info.hwnd if info else None
|
||||||
|
if hwnd and hwnd != self._last_hwnd:
|
||||||
|
self._last_hwnd = hwnd
|
||||||
|
selector = self._hit_test(*self._current_mouse_position())
|
||||||
|
self._record_event(
|
||||||
|
event_type="window_change",
|
||||||
|
mouse_info=self._current_mouse_info(),
|
||||||
|
text=None,
|
||||||
|
uia_selector=selector,
|
||||||
|
window=info,
|
||||||
|
)
|
||||||
|
time.sleep(interval)
|
||||||
|
|
||||||
|
# Recording helpers --------------------------------------------------
|
||||||
|
def _shutdown(self) -> None:
|
||||||
|
if self._flush_timer and self._flush_timer.is_alive():
|
||||||
|
self._flush_timer.cancel()
|
||||||
|
if self._mouse_listener:
|
||||||
|
self._mouse_listener.stop()
|
||||||
|
if self._keyboard_listener:
|
||||||
|
self._keyboard_listener.stop()
|
||||||
|
if self._window_thread and self._window_thread.is_alive():
|
||||||
|
self._window_thread.join(timeout=1.0)
|
||||||
|
if self._screen_recorder:
|
||||||
|
self._screen_recorder.stop()
|
||||||
|
self._write_events()
|
||||||
|
self._write_manifest()
|
||||||
|
|
||||||
|
def _schedule_flush(self) -> None:
|
||||||
|
if self._flush_timer and self._flush_timer.is_alive():
|
||||||
|
self._flush_timer.cancel()
|
||||||
|
self._flush_timer = threading.Timer(0.8, self._flush_text_buffer)
|
||||||
|
self._flush_timer.daemon = True
|
||||||
|
self._flush_timer.start()
|
||||||
|
|
||||||
|
def _flush_text_buffer(self) -> None:
|
||||||
|
if not self._text_buffer:
|
||||||
|
return
|
||||||
|
text = "".join(self._text_buffer)
|
||||||
|
self._text_buffer = []
|
||||||
|
mouse_info = self._current_mouse_info()
|
||||||
|
selector = None
|
||||||
|
if mouse_info:
|
||||||
|
selector = self._hit_test(mouse_info.x, mouse_info.y)
|
||||||
|
window_info = self._get_window_info()
|
||||||
|
self._record_event(
|
||||||
|
event_type="text_input",
|
||||||
|
mouse_info=mouse_info,
|
||||||
|
text=text,
|
||||||
|
uia_selector=selector,
|
||||||
|
window=window_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _record_event(
|
||||||
|
self,
|
||||||
|
event_type: str,
|
||||||
|
mouse_info: Optional[MouseInfo],
|
||||||
|
text: Optional[str],
|
||||||
|
uia_selector: Optional[UISelector],
|
||||||
|
window: Optional[WindowInfo],
|
||||||
|
) -> None:
|
||||||
|
self._event_index += 1
|
||||||
|
ts = time.time()
|
||||||
|
offset_ms = int((time.perf_counter() - self._start_perf) * 1000)
|
||||||
|
frame_paths = self._capture_frame(event_type, self._event_index, mouse_info, uia_selector, window)
|
||||||
|
ui_snapshot_path = self._save_ui_snapshot(self._event_index, uia_selector)
|
||||||
|
|
||||||
|
record = EventRecord(
|
||||||
|
ts=ts,
|
||||||
|
event_type=event_type,
|
||||||
|
window=window,
|
||||||
|
mouse=mouse_info,
|
||||||
|
text=text,
|
||||||
|
uia=uia_selector,
|
||||||
|
frame_paths=frame_paths,
|
||||||
|
video_time_offset_ms=offset_ms,
|
||||||
|
ui_snapshot=ui_snapshot_path,
|
||||||
|
)
|
||||||
|
with self._lock:
|
||||||
|
self.events.append(record)
|
||||||
|
|
||||||
|
def _capture_frame(
|
||||||
|
self,
|
||||||
|
tag: str,
|
||||||
|
event_index: int,
|
||||||
|
mouse_info: Optional[MouseInfo],
|
||||||
|
uia_selector: Optional[UISelector],
|
||||||
|
window: Optional[WindowInfo],
|
||||||
|
) -> Optional[FramePaths]:
|
||||||
|
if not self._monitor:
|
||||||
|
return None
|
||||||
|
|
||||||
|
region = self._monitor_region(window)
|
||||||
|
with mss.mss() as sct:
|
||||||
|
shot = np.array(sct.grab(region))
|
||||||
|
frame = cv2.cvtColor(shot, cv2.COLOR_BGRA2BGR)
|
||||||
|
|
||||||
|
full_path = self.frames_dir / f"frame_{event_index:05d}_{tag}.png"
|
||||||
|
cv2.imwrite(str(full_path), frame)
|
||||||
|
|
||||||
|
crop_mouse_path = None
|
||||||
|
crop_element_path = None
|
||||||
|
if mouse_info:
|
||||||
|
crop_mouse_path = self._save_mouse_crop(frame, region, mouse_info, event_index)
|
||||||
|
if uia_selector and uia_selector.bounding_rect:
|
||||||
|
crop_element_path = self._save_element_crop(frame, region, uia_selector.bounding_rect, event_index)
|
||||||
|
|
||||||
|
return FramePaths(
|
||||||
|
full=str(full_path),
|
||||||
|
crop_mouse=str(crop_mouse_path) if crop_mouse_path else None,
|
||||||
|
crop_element=str(crop_element_path) if crop_element_path else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _save_mouse_crop(self, frame: np.ndarray, region: dict, mouse_info: MouseInfo, event_index: int) -> Optional[Path]:
|
||||||
|
width, height = frame.shape[1], frame.shape[0]
|
||||||
|
center_x = int(mouse_info.x - region["left"])
|
||||||
|
center_y = int(mouse_info.y - region["top"])
|
||||||
|
crop_w, crop_h = 400, 300
|
||||||
|
x0 = max(0, center_x - crop_w // 2)
|
||||||
|
y0 = max(0, center_y - crop_h // 2)
|
||||||
|
x1 = min(width, x0 + crop_w)
|
||||||
|
y1 = min(height, y0 + crop_h)
|
||||||
|
if x1 <= x0 or y1 <= y0:
|
||||||
|
return None
|
||||||
|
crop = frame[y0:y1, x0:x1]
|
||||||
|
path = self.frames_crops_dir / f"frame_{event_index:05d}_mouse.png"
|
||||||
|
cv2.imwrite(str(path), crop)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _save_element_crop(self, frame: np.ndarray, region: dict, rect: Rect, event_index: int) -> Optional[Path]:
|
||||||
|
width, height = frame.shape[1], frame.shape[0]
|
||||||
|
x0 = max(0, int(rect.left - region["left"]))
|
||||||
|
y0 = max(0, int(rect.top - region["top"]))
|
||||||
|
x1 = min(width, int(rect.right - region["left"]))
|
||||||
|
y1 = min(height, int(rect.bottom - region["top"]))
|
||||||
|
if x1 <= x0 or y1 <= y0:
|
||||||
|
return None
|
||||||
|
crop = frame[y0:y1, x0:x1]
|
||||||
|
path = self.frames_crops_dir / f"frame_{event_index:05d}_element.png"
|
||||||
|
cv2.imwrite(str(path), crop)
|
||||||
|
return path
|
||||||
|
|
||||||
|
def _monitor_region(self, window: Optional[WindowInfo]) -> dict:
|
||||||
|
if window and window.rect and window.rect.width > 0 and window.rect.height > 0:
|
||||||
|
return {
|
||||||
|
"left": int(window.rect.left),
|
||||||
|
"top": int(window.rect.top),
|
||||||
|
"width": int(window.rect.width),
|
||||||
|
"height": int(window.rect.height),
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"left": int(self._monitor["left"]),
|
||||||
|
"top": int(self._monitor["top"]),
|
||||||
|
"width": int(self._monitor["width"]),
|
||||||
|
"height": int(self._monitor["height"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _save_ui_snapshot(self, event_index: int, selector: Optional[UISelector]) -> Optional[str]:
|
||||||
|
tree = self._capture_tree(max_depth=3)
|
||||||
|
if not tree and selector is None:
|
||||||
|
return None
|
||||||
|
path = self.ui_snapshots_dir / f"ui_{event_index:05d}.json"
|
||||||
|
snapshot = UISnapshot(selector=selector, tree=tree)
|
||||||
|
with path.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(snapshot.dict(exclude_none=True), f, ensure_ascii=False)
|
||||||
|
return str(path)
|
||||||
|
|
||||||
|
# UI helpers ---------------------------------------------------------
|
||||||
|
def _capture_tree(self, max_depth: int = 3) -> List[UITreeNode]:
|
||||||
|
self._ensure_uia_initialized()
|
||||||
|
root = auto.GetForegroundControl()
|
||||||
|
if root is None:
|
||||||
|
return []
|
||||||
|
nodes: List[UITreeNode] = []
|
||||||
|
queue: List[Tuple[auto.Control, int]] = [(root, 0)] # type: ignore
|
||||||
|
while queue:
|
||||||
|
node, depth = queue.pop(0)
|
||||||
|
if depth > max_depth:
|
||||||
|
continue
|
||||||
|
nodes.append(
|
||||||
|
UITreeNode(
|
||||||
|
name=node.Name,
|
||||||
|
automation_id=node.AutomationId,
|
||||||
|
class_name=node.ClassName,
|
||||||
|
control_type=node.ControlTypeName,
|
||||||
|
depth=depth,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
children = list(node.GetChildren())
|
||||||
|
except Exception:
|
||||||
|
children = []
|
||||||
|
for child in children:
|
||||||
|
queue.append((child, depth + 1))
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
def _hit_test(self, x: int, y: int) -> Optional[UISelector]:
|
||||||
|
try:
|
||||||
|
self._ensure_uia_initialized()
|
||||||
|
ctrl = auto.ControlFromPoint((int(x), int(y)))
|
||||||
|
except Exception:
|
||||||
|
ctrl = None
|
||||||
|
if not ctrl:
|
||||||
|
return None
|
||||||
|
return self._build_selector(ctrl)
|
||||||
|
|
||||||
|
def _get_window_info(self) -> Optional[WindowInfo]:
|
||||||
|
self._ensure_uia_initialized()
|
||||||
|
ctrl = auto.GetForegroundControl()
|
||||||
|
if ctrl is None:
|
||||||
|
return None
|
||||||
|
rect = getattr(ctrl, "BoundingRectangle", None)
|
||||||
|
self._ensure_uia_initialized()
|
||||||
|
rect_model = None
|
||||||
|
if rect:
|
||||||
|
rect_model = Rect(left=int(rect.left), top=int(rect.top), right=int(rect.right), bottom=int(rect.bottom))
|
||||||
|
process_name = None
|
||||||
|
try:
|
||||||
|
process_name = psutil.Process(ctrl.ProcessId).name()
|
||||||
|
except Exception:
|
||||||
|
process_name = None
|
||||||
|
hwnd = getattr(ctrl, "NativeWindowHandle", None) or getattr(ctrl, "Handle", None)
|
||||||
|
return WindowInfo(
|
||||||
|
hwnd=int(hwnd) if hwnd else None,
|
||||||
|
title=ctrl.Name,
|
||||||
|
process_name=process_name,
|
||||||
|
rect=rect_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_selector(self, ctrl: auto.Control) -> UISelector: # type: ignore
|
||||||
|
rect = getattr(ctrl, "BoundingRectangle", None)
|
||||||
|
rect_model = None
|
||||||
|
if rect:
|
||||||
|
rect_model = Rect(left=int(rect.left), top=int(rect.top), right=int(rect.right), bottom=int(rect.bottom))
|
||||||
|
return UISelector(
|
||||||
|
automation_id=getattr(ctrl, "AutomationId", None),
|
||||||
|
name=getattr(ctrl, "Name", None),
|
||||||
|
class_name=getattr(ctrl, "ClassName", None),
|
||||||
|
control_type=getattr(ctrl, "ControlTypeName", None),
|
||||||
|
bounding_rect=rect_model,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Utility ------------------------------------------------------------
|
||||||
|
def _key_to_char(self, key: keyboard.Key | keyboard.KeyCode) -> Optional[str]:
|
||||||
|
if isinstance(key, keyboard.KeyCode) and key.char:
|
||||||
|
return key.char
|
||||||
|
if key == keyboard.Key.space:
|
||||||
|
return " "
|
||||||
|
if key == keyboard.Key.enter:
|
||||||
|
return "\n"
|
||||||
|
if key == keyboard.Key.backspace:
|
||||||
|
if self._text_buffer:
|
||||||
|
self._text_buffer.pop()
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _is_hotkey(self, key: keyboard.Key | keyboard.KeyCode) -> bool:
|
||||||
|
target = self.hotkey.lower()
|
||||||
|
name = None
|
||||||
|
if isinstance(key, keyboard.Key):
|
||||||
|
name = (key.name or "").lower()
|
||||||
|
elif isinstance(key, keyboard.KeyCode):
|
||||||
|
name = (key.char or "").lower()
|
||||||
|
return name == target
|
||||||
|
|
||||||
|
def _current_mouse_position(self) -> Tuple[int, int]:
|
||||||
|
pos = self._mouse_controller.position
|
||||||
|
return int(pos[0]), int(pos[1])
|
||||||
|
|
||||||
|
def _current_mouse_info(self) -> Optional[MouseInfo]:
|
||||||
|
x, y = self._current_mouse_position()
|
||||||
|
return MouseInfo(x=int(x), y=int(y), button=None, action=None)
|
||||||
|
|
||||||
|
def _ensure_uia_initialized(self) -> None:
|
||||||
|
if getattr(self._uia_local, "token", None) is None:
|
||||||
|
self._uia_local.token = auto.UIAutomationInitializerInThread()
|
||||||
|
|
||||||
|
# Persistence --------------------------------------------------------
|
||||||
|
def _write_events(self) -> None:
|
||||||
|
with self.events_path.open("w", encoding="utf-8") as f:
|
||||||
|
for event in self.events:
|
||||||
|
f.write(json.dumps(event.dict(exclude_none=True), ensure_ascii=False))
|
||||||
|
f.write("\n")
|
||||||
|
|
||||||
|
def _write_manifest(self) -> None:
|
||||||
|
resolution = self._resolution()
|
||||||
|
manifest = SessionManifest(
|
||||||
|
session_id=self.session_id,
|
||||||
|
start_time=self._start_ts,
|
||||||
|
end_time=time.time(),
|
||||||
|
resolution=resolution,
|
||||||
|
fps=self.fps,
|
||||||
|
screen=self.screen,
|
||||||
|
video_path=str(self.video_path),
|
||||||
|
events_path=str(self.events_path),
|
||||||
|
frames_dir=str(self.frames_dir),
|
||||||
|
frames_crops_dir=str(self.frames_crops_dir),
|
||||||
|
ui_snapshots_dir=str(self.ui_snapshots_dir),
|
||||||
|
)
|
||||||
|
path = self.session_dir / "manifest.json"
|
||||||
|
with path.open("w", encoding="utf-8") as f:
|
||||||
|
json.dump(manifest.dict(exclude_none=True), f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
def _resolution(self) -> str:
|
||||||
|
if self._monitor:
|
||||||
|
return f"{self._monitor['width']}x{self._monitor['height']}"
|
||||||
|
try:
|
||||||
|
width, height = auto.GetScreenSize()
|
||||||
|
return f"{width}x{height}"
|
||||||
|
except Exception:
|
||||||
|
return "unknown"
|
||||||
120
autodemo/schema.py
Normal file
120
autodemo/schema.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""Data schemas for recording and DSL components."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Literal, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class Rect(BaseModel):
|
||||||
|
left: int
|
||||||
|
top: int
|
||||||
|
right: int
|
||||||
|
bottom: int
|
||||||
|
|
||||||
|
@property
|
||||||
|
def width(self) -> int:
|
||||||
|
return self.right - self.left
|
||||||
|
|
||||||
|
@property
|
||||||
|
def height(self) -> int:
|
||||||
|
return self.bottom - self.top
|
||||||
|
|
||||||
|
|
||||||
|
class WindowInfo(BaseModel):
|
||||||
|
hwnd: Optional[int] = None
|
||||||
|
title: Optional[str] = None
|
||||||
|
process_name: Optional[str] = None
|
||||||
|
rect: Optional[Rect] = None
|
||||||
|
|
||||||
|
|
||||||
|
class UISelector(BaseModel):
|
||||||
|
automation_id: Optional[str] = None
|
||||||
|
name: Optional[str] = None
|
||||||
|
class_name: Optional[str] = None
|
||||||
|
control_type: Optional[str] = None
|
||||||
|
bounding_rect: Optional[Rect] = None
|
||||||
|
|
||||||
|
|
||||||
|
class FramePaths(BaseModel):
|
||||||
|
full: Optional[str] = None
|
||||||
|
crop_mouse: Optional[str] = None
|
||||||
|
crop_element: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class MouseInfo(BaseModel):
|
||||||
|
x: int
|
||||||
|
y: int
|
||||||
|
button: Optional[str] = None
|
||||||
|
action: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class UITreeNode(BaseModel):
|
||||||
|
name: Optional[str]
|
||||||
|
automation_id: Optional[str]
|
||||||
|
class_name: Optional[str]
|
||||||
|
control_type: Optional[str]
|
||||||
|
depth: int
|
||||||
|
|
||||||
|
|
||||||
|
EventType = Literal["mouse_click", "text_input", "window_change"]
|
||||||
|
|
||||||
|
|
||||||
|
class EventRecord(BaseModel):
|
||||||
|
ts: float
|
||||||
|
event_type: EventType
|
||||||
|
window: Optional[WindowInfo] = None
|
||||||
|
mouse: Optional[MouseInfo] = None
|
||||||
|
text: Optional[str] = None
|
||||||
|
uia: Optional[UISelector] = None
|
||||||
|
frame_paths: Optional[FramePaths] = None
|
||||||
|
video_time_offset_ms: Optional[int] = Field(None, alias="video_time_offset_ms")
|
||||||
|
ui_snapshot: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class UISnapshot(BaseModel):
|
||||||
|
selector: Optional[UISelector] = None
|
||||||
|
tree: List[UITreeNode] = Field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class SessionManifest(BaseModel):
|
||||||
|
session_id: str
|
||||||
|
start_time: float
|
||||||
|
end_time: float
|
||||||
|
resolution: Optional[str] = None
|
||||||
|
fps: int
|
||||||
|
screen: int
|
||||||
|
video_path: str
|
||||||
|
events_path: str
|
||||||
|
frames_dir: str
|
||||||
|
frames_crops_dir: str
|
||||||
|
ui_snapshots_dir: str
|
||||||
|
|
||||||
|
|
||||||
|
# DSL schemas (kept for executor/infer workflow) ------------------------
|
||||||
|
class DSLAction(BaseModel):
|
||||||
|
action: Literal["click", "type", "set_value", "assert_exists", "wait_for"]
|
||||||
|
target: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
text: Optional[str] = None
|
||||||
|
params: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
retry_policy: Optional[Dict[str, Any]] = None
|
||||||
|
waits: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class DSLBlock(BaseModel):
|
||||||
|
name: str
|
||||||
|
steps: List[Any] = Field(default_factory=list)
|
||||||
|
if_condition: Optional[str] = None
|
||||||
|
else_steps: Optional[List[Any]] = None
|
||||||
|
for_each: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class DSLSpec(BaseModel):
|
||||||
|
params: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
steps: List[Any]
|
||||||
|
assertions: List[str] = Field(default_factory=list)
|
||||||
|
retry_policy: Dict[str, Any] = Field(default_factory=lambda: {"max_attempts": 2, "interval": 1.0})
|
||||||
|
waits: Dict[str, Any] = Field(default_factory=lambda: {"appear": 5.0, "disappear": 5.0})
|
||||||
155
autodemo/screen_recorder.py
Normal file
155
autodemo/screen_recorder.py
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""Screen recording helper with ffmpeg primary and mss+cv2 fallback."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
import cv2 # type: ignore
|
||||||
|
import mss # type: ignore
|
||||||
|
import numpy as np # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
class ScreenRecorder:
|
||||||
|
"""Record the screen to an MP4 file."""
|
||||||
|
|
||||||
|
def __init__(self, output_path: Path, fps: int = 12, screen: int = 0) -> None:
|
||||||
|
self.output_path = output_path
|
||||||
|
self.fps = fps
|
||||||
|
self.screen = screen
|
||||||
|
|
||||||
|
self._proc: Optional[subprocess.Popen] = None
|
||||||
|
self._thread: Optional[threading.Thread] = None
|
||||||
|
self._stop_event = threading.Event()
|
||||||
|
self._monitor: Optional[Dict[str, int]] = None
|
||||||
|
self._writer: Optional[cv2.VideoWriter] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def monitor(self) -> Optional[Dict[str, int]]:
|
||||||
|
return self._monitor
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Start recording using ffmpeg if available, otherwise mss+cv2."""
|
||||||
|
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if self._start_ffmpeg():
|
||||||
|
return
|
||||||
|
self._start_mss_fallback()
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stop recording gracefully."""
|
||||||
|
self._stop_event.set()
|
||||||
|
if self._proc:
|
||||||
|
try:
|
||||||
|
if self._proc.stdin:
|
||||||
|
self._proc.stdin.write(b"q")
|
||||||
|
self._proc.stdin.flush()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self._proc.wait(timeout=5)
|
||||||
|
except Exception:
|
||||||
|
self._proc.kill()
|
||||||
|
self._proc = None
|
||||||
|
if self._thread and self._thread.is_alive():
|
||||||
|
self._thread.join(timeout=5)
|
||||||
|
self._thread = None
|
||||||
|
if self._writer:
|
||||||
|
self._writer.release()
|
||||||
|
self._writer = None
|
||||||
|
|
||||||
|
def _start_ffmpeg(self) -> bool:
|
||||||
|
if shutil.which("ffmpeg") is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitors = sct.monitors
|
||||||
|
if 0 <= self.screen < len(monitors):
|
||||||
|
self._monitor = monitors[self.screen]
|
||||||
|
else:
|
||||||
|
self._monitor = monitors[0]
|
||||||
|
|
||||||
|
width = int(self._monitor["width"])
|
||||||
|
height = int(self._monitor["height"])
|
||||||
|
offset_x = int(self._monitor["left"])
|
||||||
|
offset_y = int(self._monitor["top"])
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-y",
|
||||||
|
"-f",
|
||||||
|
"gdigrab",
|
||||||
|
"-framerate",
|
||||||
|
str(self.fps),
|
||||||
|
"-offset_x",
|
||||||
|
str(offset_x),
|
||||||
|
"-offset_y",
|
||||||
|
str(offset_y),
|
||||||
|
"-video_size",
|
||||||
|
f"{width}x{height}",
|
||||||
|
"-draw_mouse",
|
||||||
|
"1",
|
||||||
|
"-i",
|
||||||
|
"desktop",
|
||||||
|
"-pix_fmt",
|
||||||
|
"yuv420p",
|
||||||
|
"-vcodec",
|
||||||
|
"libx264",
|
||||||
|
"-preset",
|
||||||
|
"ultrafast",
|
||||||
|
str(self.output_path),
|
||||||
|
]
|
||||||
|
|
||||||
|
creation_flags = subprocess.CREATE_NO_WINDOW if hasattr(subprocess, "CREATE_NO_WINDOW") else 0
|
||||||
|
try:
|
||||||
|
self._proc = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
creationflags=creation_flags,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
self._proc = None
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _start_mss_fallback(self) -> None:
|
||||||
|
self._stop_event.clear()
|
||||||
|
self._thread = threading.Thread(target=self._capture_loop, daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
|
||||||
|
def _capture_loop(self) -> None:
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitors = sct.monitors
|
||||||
|
if 0 <= self.screen < len(monitors):
|
||||||
|
self._monitor = monitors[self.screen]
|
||||||
|
else:
|
||||||
|
self._monitor = monitors[0]
|
||||||
|
|
||||||
|
width = int(self._monitor["width"])
|
||||||
|
height = int(self._monitor["height"])
|
||||||
|
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
||||||
|
self._writer = cv2.VideoWriter(str(self.output_path), fourcc, self.fps, (width, height))
|
||||||
|
|
||||||
|
frame_interval = 1.0 / max(self.fps, 1)
|
||||||
|
next_ts = time.perf_counter()
|
||||||
|
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
shot = np.array(sct.grab(self._monitor))
|
||||||
|
frame = cv2.cvtColor(shot, cv2.COLOR_BGRA2BGR)
|
||||||
|
self._writer.write(frame)
|
||||||
|
|
||||||
|
next_ts += frame_interval
|
||||||
|
sleep_for = max(0.0, next_ts - time.perf_counter())
|
||||||
|
if sleep_for:
|
||||||
|
time.sleep(sleep_for)
|
||||||
|
|
||||||
|
if self._writer:
|
||||||
|
self._writer.release()
|
||||||
|
self._writer = None
|
||||||
12
requirements.txt
Normal file
12
requirements.txt
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
pydantic>=1.10,<3
|
||||||
|
uiautomation>=2.0.20
|
||||||
|
pywin32>=306
|
||||||
|
pyyaml>=6.0.1
|
||||||
|
pytest>=7.4.0
|
||||||
|
pynput>=1.7.6
|
||||||
|
mss>=9.0.1
|
||||||
|
opencv-python>=4.8.0
|
||||||
|
psutil>=5.9.6
|
||||||
|
numpy>=1.26.0
|
||||||
|
requests>=2.31.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
21
tests/test_dummy_llm.py
Normal file
21
tests/test_dummy_llm.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""最小端到端测试:Dummy LLM 推理。"""
|
||||||
|
|
||||||
|
from autodemo.llm import DummyLLM
|
||||||
|
from autodemo.schema import ControlSnapshot, EventRecord, Rect
|
||||||
|
|
||||||
|
|
||||||
|
def test_dummy_llm_generate() -> None:
|
||||||
|
llm = DummyLLM()
|
||||||
|
ev = EventRecord(
|
||||||
|
kind="mouse_click",
|
||||||
|
timestamp=1.0,
|
||||||
|
data={"x": 1, "y": 2},
|
||||||
|
control=ControlSnapshot(
|
||||||
|
AutomationId="btn1", Name="按钮", ClassName="Button", ControlType="Button", BoundingRectangle=Rect(left=0, top=0, right=10, bottom=10)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
spec = llm.generate([ev])
|
||||||
|
assert spec.steps[0]["action"] == "click"
|
||||||
|
assert spec.steps[0]["target"]["AutomationId"] == "btn1"
|
||||||
29
tests/test_executor_dry.py
Normal file
29
tests/test_executor_dry.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""最小端到端测试:执行器 dry-run 模式。"""
|
||||||
|
|
||||||
|
from autodemo.executor import ExecContext, execute_spec
|
||||||
|
from autodemo.schema import DSLSpec
|
||||||
|
|
||||||
|
|
||||||
|
def test_executor_dry_run(monkeypatch, capsys) -> None:
|
||||||
|
# 替换 _match_window 与 _find_control 以避免真实 UI 依赖
|
||||||
|
from autodemo import executor
|
||||||
|
|
||||||
|
def fake_match(title: str):
|
||||||
|
class Dummy:
|
||||||
|
Name = "Notepad"
|
||||||
|
|
||||||
|
return Dummy()
|
||||||
|
|
||||||
|
def fake_find(root, locator, timeout):
|
||||||
|
return object()
|
||||||
|
|
||||||
|
monkeypatch.setattr(executor, "_match_window", fake_match)
|
||||||
|
monkeypatch.setattr(executor, "_find_control", fake_find)
|
||||||
|
|
||||||
|
spec = DSLSpec(steps=[{"action": "click", "target": {"Name": "ok"}}])
|
||||||
|
ctx = ExecContext(allow_title=".*", dry_run=True)
|
||||||
|
execute_spec(spec, ctx)
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "dry-run" in out
|
||||||
11
tests/test_schema.py
Normal file
11
tests/test_schema.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# MIT License
|
||||||
|
# Copyright (c) 2024
|
||||||
|
"""最小端到端测试:schema 校验。"""
|
||||||
|
|
||||||
|
from autodemo.schema import DSLSpec
|
||||||
|
|
||||||
|
|
||||||
|
def test_dsl_schema_defaults() -> None:
|
||||||
|
spec = DSLSpec(steps=[{"action": "click", "target": {"Name": "btn"}}])
|
||||||
|
assert spec.retry_policy["max_attempts"] == 2
|
||||||
|
assert spec.waits["appear"] == 5.0
|
||||||
Loading…
x
Reference in New Issue
Block a user