fix: improve Windows local skill file reading (#6028)

* chore: ignore local worktrees * fix: improve Windows local skill file reading * fix: address Windows path and decoding review feedback * fix: simplify shell decoding follow-up * fix: harden sandbox skill prompt metadata * fix: preserve safe sandbox skill summaries * fix: relax sandbox summary sanitization * fix: tighten path sanitization for skill prompts * fix: harden sandbox skill display metadata * fix: preserve Unicode skill paths in prompts * fix: quote Windows skill prompt paths * fix: simplify local shell output decoding * fix: localize Windows prompt path handling * fix: normalize Windows-style skill paths in prompts * fix: align prompt and shell decoding behavior
2026-03-11 23:58:28 +09:00
parent e254caf82d
commit 589cce18af
5 changed files with 455 additions and 20 deletions
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import asyncio
+import locale
 import os
 import shutil
 import subprocess
@@ -52,6 +53,31 @@ def _ensure_safe_path(path: str) -> str:
    return abs_path


+def _decode_shell_output(output: bytes | None) -> str:
+    if output is None:
+        return ""
+
+    preferred = locale.getpreferredencoding(False) or "utf-8"
+    try:
+        return output.decode("utf-8")
+    except (LookupError, UnicodeDecodeError):
+        pass
+
+    if os.name == "nt":
+        for encoding in ("mbcs", "cp936", "gbk", "gb18030"):
+            try:
+                return output.decode(encoding)
+            except (LookupError, UnicodeDecodeError):
+                continue
+
+    try:
+        return output.decode(preferred)
+    except (LookupError, UnicodeDecodeError):
+        pass
+
+    return output.decode("utf-8", errors="replace")
+
+
@dataclass
 class LocalShellComponent(ShellComponent):
    async def exec(
@@ -72,28 +98,32 @@ class LocalShellComponent(ShellComponent):
                run_env.update({str(k): str(v) for k, v in env.items()})
            working_dir = _ensure_safe_path(cwd) if cwd else get_astrbot_root()
            if background:
-                proc = subprocess.Popen(
+                # `command` is intentionally executed through the current shell so
+                # local computer-use behavior matches existing tool semantics.
+                # Safety relies on `_is_safe_command()` and the allowed-root checks.
+                proc = subprocess.Popen(  # noqa: S602  # nosemgrep: python.lang.security.audit.dangerous-subprocess-use-audit
                    command,
                    shell=shell,
                    cwd=working_dir,
                    env=run_env,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
                )
                return {"pid": proc.pid, "stdout": "", "stderr": "", "exit_code": None}
-            result = subprocess.run(
+            # `command` is intentionally executed through the current shell so
+            # local computer-use behavior matches existing tool semantics.
+            # Safety relies on `_is_safe_command()` and the allowed-root checks.
+            result = subprocess.run(  # noqa: S602  # nosemgrep: python.lang.security.audit.dangerous-subprocess-use-audit
                command,
                shell=shell,
                cwd=working_dir,
                env=run_env,
                timeout=timeout,
                capture_output=True,
-                text=True,
            )
            return {
-                "stdout": result.stdout,
-                "stderr": result.stderr,
+                "stdout": _decode_shell_output(result.stdout),
+                "stderr": _decode_shell_output(result.stderr),
                "exit_code": result.returncode,
            }

@@ -3,6 +3,7 @@ from __future__ import annotations
 import json
 import os
 import re
+import shlex
 import shutil
 import tempfile
 import zipfile
@@ -79,7 +80,59 @@ def _parse_frontmatter_description(text: str) -> str:

 # Regex for sanitizing paths used in prompt examples — only allow
 # safe path characters to prevent prompt injection via crafted skill paths.
-_SAFE_PATH_RE = re.compile(r"[^A-Za-z0-9_./ -]")
+_SAFE_PATH_RE = re.compile(r"[^\w./ ,()'\-]", re.UNICODE)
+_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:(?:/|\\)")
+_WINDOWS_UNC_PATH_RE = re.compile(r"^(//|\\\\)[^/\\]+[/\\][^/\\]+")
+_CONTROL_CHARS_RE = re.compile(r"[\x00-\x1F\x7F]")
+
+
+def _is_windows_prompt_path(path: str) -> bool:
+    if os.name != "nt":
+        return False
+    return bool(_WINDOWS_DRIVE_PATH_RE.match(path) or _WINDOWS_UNC_PATH_RE.match(path))
+
+
+def _sanitize_prompt_path_for_prompt(path: str) -> str:
+    if not path:
+        return ""
+
+    if _WINDOWS_DRIVE_PATH_RE.match(path) or _WINDOWS_UNC_PATH_RE.match(path):
+        path = path.replace("\\", "/")
+
+    drive_prefix = ""
+    if _WINDOWS_DRIVE_PATH_RE.match(path):
+        drive_prefix = path[:2]
+        path = path[2:]
+
+    path = path.replace("`", "")
+    path = _CONTROL_CHARS_RE.sub("", path)
+    sanitized = _SAFE_PATH_RE.sub("", path)
+    return f"{drive_prefix}{sanitized}"
+
+
+def _sanitize_prompt_description(description: str) -> str:
+    description = description.replace("`", "")
+    description = _CONTROL_CHARS_RE.sub(" ", description)
+    description = " ".join(description.split())
+    return description
+
+
+def _sanitize_skill_display_name(name: str) -> str:
+    if _SKILL_NAME_RE.fullmatch(name):
+        return name
+    return "<invalid_skill_name>"
+
+
+def _build_skill_read_command_example(path: str) -> str:
+    if path == "<skills_root>/<skill_name>/SKILL.md":
+        return f"cat {path}"
+    if _is_windows_prompt_path(path):
+        command = "type"
+        path_arg = f'"{path}"'
+    else:
+        command = "cat"
+        path_arg = shlex.quote(path)
+    return f"{command} {path_arg}"


 def build_skills_prompt(skills: list[SkillInfo]) -> str:
@@ -92,16 +145,37 @@ def build_skills_prompt(skills: list[SkillInfo]) -> str:
    skills_lines: list[str] = []
    example_path = ""
    for skill in skills:
+        display_name = _sanitize_skill_display_name(skill.name)
+
        description = skill.description or "No description"
+        if skill.source_type == "sandbox_only":
+            description = _sanitize_prompt_description(description)
+            if not description:
+                description = "Read SKILL.md for details."
+
+        if skill.source_type == "sandbox_only":
+            rendered_path = (
+                f"{str(SANDBOX_WORKSPACE_ROOT)}/{str(SANDBOX_SKILLS_ROOT)}/"
+                f"{display_name}/SKILL.md"
+            )
+        else:
+            rendered_path = _sanitize_prompt_path_for_prompt(skill.path)
+            if not rendered_path:
+                rendered_path = "<skills_root>/<skill_name>/SKILL.md"
+
        skills_lines.append(
-            f"- **{skill.name}**: {description}\n  File: `{skill.path}`"
+            f"- **{display_name}**: {description}\n  File: `{rendered_path}`"
        )
        if not example_path:
-            example_path = skill.path
+            example_path = rendered_path
    skills_block = "\n".join(skills_lines)
    # Sanitize example_path — it may originate from sandbox cache (untrusted)
-    example_path = _SAFE_PATH_RE.sub("", example_path) if example_path else ""
-    example_path = example_path or "<skills_root>/<skill_name>/SKILL.md"
+    if example_path == "<skills_root>/<skill_name>/SKILL.md":
+        example_path = "<skills_root>/<skill_name>/SKILL.md"
+    else:
+        example_path = _sanitize_prompt_path_for_prompt(example_path)
+        example_path = example_path or "<skills_root>/<skill_name>/SKILL.md"
+    example_command = _build_skill_read_command_example(example_path)

    return (
        "## Skills\n\n"
@@ -119,8 +193,9 @@ def build_skills_prompt(skills: list[SkillInfo]) -> str:
        "*Never silently skip a matching skill* — either use it or briefly "
        "explain why you chose not to.\n"
        "3. **Mandatory grounding** — Before executing any skill you MUST "
-        "first read its `SKILL.md` by running a shell command with the "
-        f"**absolute path** shown above (e.g. `cat {example_path}`). "
+        "first read its `SKILL.md` by running a shell command compatible "
+        "with the current runtime shell and using the **absolute path** "
+        f"shown above (e.g. `{example_command}`). "
        "Never rely on memory or assumptions about a skill's content.\n"
        "4. **Progressive disclosure** — Load only what is directly "
        "referenced from `SKILL.md`:\n"