docs: transfer AstrBotDevs/AstrBot-docs to AstrBotDevs/AstrBot (#5960)
* docs: transfer AstrBotDevs/AstrBot-docs to AstrBotDevs/AstrBot * refactor: reorder imports and improve type hints in sync_docs_to_wiki.py and upload_doc_images_to_r2.py * feat: add GitHub Actions workflow to sync wiki with documentation Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com> Co-authored-by: anka-afk <110004162+anka-afk@users.noreply.github.com> Co-authored-by: zouyonghe <62183434+zouyonghe@users.noreply.github.com> Co-authored-by: shuiping233 <49360196+shuiping233@users.noreply.github.com> Co-authored-by: LIghtJUNction <106986785+LIghtJUNction@users.noreply.github.com> Co-authored-by: Sjshi763 <179909421+Sjshi763@users.noreply.github.com> Co-authored-by: xiewoc <70128845+xiewoc@users.noreply.github.com> Co-authored-by: QingFeng-awa <151742581+QingFeng-awa@users.noreply.github.com> Co-authored-by: PaloMiku <96452465+PaloMiku@users.noreply.github.com> Co-authored-by: shangxueink <138397030+shangxueink@users.noreply.github.com> Co-authored-by: IGCrystal-A <244300990+IGCrystal-A@users.noreply.github.com> Co-authored-by: RC-CHN <67079377+RC-CHN@users.noreply.github.com> Co-authored-by: MC090610 <113341105+MC090610@users.noreply.github.com> Co-authored-by: Waterwzy <196913419+Waterwzy@users.noreply.github.com> Co-authored-by: Lanhuace-Wan <186303160+Lanhuace-Wan@users.noreply.github.com> Co-authored-by: LiAlH4qwq <61769640+LiAlH4qwq@users.noreply.github.com> Co-authored-by: HSOS6 <209910899+HSOS6@users.noreply.github.com> Co-authored-by: th-dd <162813557+th-dd@users.noreply.github.com> Co-authored-by: miaoxutao123 <81676466+miaoxutao123@users.noreply.github.com> Co-authored-by: nuomicici <143102889+nuomicici@users.noreply.github.com> Co-authored-by: nasyt233 <210103278+nasyt233@users.noreply.github.com> Co-authored-by: jlugjb <7426462+jlugjb@users.noreply.github.com> Co-authored-by: Raven95676 <176760093+Raven95676@users.noreply.github.com> Co-authored-by: Futureppo <180109455+Futureppo@users.noreply.github.com> Co-authored-by: MliKiowa <61873808+MliKiowa@users.noreply.github.com> Co-authored-by: Fridemn <150212937+Fridemn@users.noreply.github.com> Co-authored-by: BakaCookie520 <138355736+BakaCookie520@users.noreply.github.com> Co-authored-by: YumeYuka <125112916+YumeYuka@users.noreply.github.com> Co-authored-by: xming521 <32786500+xming521@users.noreply.github.com> Co-authored-by: ywh555hhh <121592812+ywh555hhh@users.noreply.github.com> Co-authored-by: stevessr <89645372+stevessr@users.noreply.github.com> Co-authored-by: roeseth <41995115+roeseth@users.noreply.github.com> Co-authored-by: ikun-1145141 <265925499+ikun-1145141@users.noreply.github.com> Co-authored-by: evpeople <54983536+evpeople@users.noreply.github.com> Co-authored-by: Yue-bin <60509781+Yue-bin@users.noreply.github.com> Co-authored-by: W1ndys <109416673+W1ndys@users.noreply.github.com> Co-authored-by: TheFurina <218887821+TheFurina@users.noreply.github.com> Co-authored-by: Seayon <12275933+Seayon@users.noreply.github.com> Co-authored-by: OnlyblackTea <38585636+OnlyblackTea@users.noreply.github.com> Co-authored-by: ocetars <74854972+ocetars@users.noreply.github.com> Co-authored-by: railgun19457 <117180744+railgun19457@users.noreply.github.com> Co-authored-by: JunieXD <107397009+JunieXD@users.noreply.github.com> Co-authored-by: advent259141 <197440256+advent259141@users.noreply.github.com> Co-authored-by: Doge2077 <91442300+Doge2077@users.noreply.github.com> Co-authored-by: Bocity <23430545+Bocity@users.noreply.github.com> Co-authored-by: Aurora-xk <192227833+Aurora-xk@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,644 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import posixpath
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path, PurePosixPath
|
||||
|
||||
TITLE_RE = re.compile(r"^#\s+(.+)$", re.MULTILINE)
|
||||
FENCED_BLOCK_RE = re.compile(
|
||||
r"(^```.*?$.*?^```$|^~~~.*?$.*?^~~~$)",
|
||||
re.MULTILINE | re.DOTALL,
|
||||
)
|
||||
INLINE_CODE_RE = re.compile(r"(`[^`]*`)")
|
||||
MANIFEST_NAME = ".astrbot-wiki-sync-manifest"
|
||||
SOURCE_ALIASES = {
|
||||
"zh/config/providers/start.md": "zh/providers/start.md",
|
||||
"en/config/providers/start.md": "en/providers/start.md",
|
||||
}
|
||||
LANG_CONFIG = {
|
||||
"zh": {
|
||||
"index_title": "# AstrBot 中文文档",
|
||||
"index_intro": "该页面由 `AstrBot-docs` 自动同步到 GitHub Wiki。",
|
||||
"index_links": [
|
||||
("关于 AstrBot", "zh-what-is-astrbot"),
|
||||
("社区", "zh-community"),
|
||||
("常见问题", "zh-faq"),
|
||||
],
|
||||
"home_intro": "该 Wiki 由 `AstrBot-docs` 自动同步生成。",
|
||||
"home_links": [
|
||||
("中文文档入口", "zh-index"),
|
||||
("English Docs", "Home-en"),
|
||||
],
|
||||
"sidebar_language_label": "Chinese",
|
||||
"sidebar_home_label": "首页",
|
||||
"sidebar_home_target": "Home",
|
||||
"sidebar_docs_entry_label": "文档入口",
|
||||
},
|
||||
"en": {
|
||||
"index_title": "# AstrBot English Documentation",
|
||||
"index_intro": "This page is synchronized automatically from `AstrBot-docs` to the GitHub wiki.",
|
||||
"index_links": [
|
||||
("What is AstrBot", "en-what-is-astrbot"),
|
||||
("Community", "en-community"),
|
||||
("FAQ", "en-faq"),
|
||||
],
|
||||
"home_intro": "This wiki is synchronized automatically from `AstrBot-docs`.",
|
||||
"home_links": [
|
||||
("English docs entry", "en-index"),
|
||||
("中文文档入口", "Home"),
|
||||
],
|
||||
"sidebar_language_label": "English",
|
||||
"sidebar_home_label": "Home",
|
||||
"sidebar_home_target": "Home-en",
|
||||
"sidebar_docs_entry_label": "Docs Entry",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class PageInfo:
|
||||
source_path: str
|
||||
page_name: str
|
||||
title: str
|
||||
content: str
|
||||
language: str
|
||||
group: str
|
||||
is_index: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolutionResult:
|
||||
resolved_path: str | None
|
||||
ambiguous_matches: tuple[str, ...] = ()
|
||||
|
||||
|
||||
@dataclass
|
||||
class MarkdownLink:
|
||||
start: int
|
||||
end: int
|
||||
prefix: str
|
||||
target: str
|
||||
suffix: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Segment:
|
||||
kind: str
|
||||
text: str
|
||||
|
||||
|
||||
def repo_root() -> Path:
|
||||
return Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def discover_source_pages(source_root: str) -> tuple[str, ...]:
|
||||
root = Path(source_root)
|
||||
pages = []
|
||||
for language in ("zh", "en"):
|
||||
language_root = root / language
|
||||
if not language_root.exists():
|
||||
continue
|
||||
for path in language_root.rglob("*.md"):
|
||||
pages.append(path.relative_to(root).as_posix())
|
||||
return tuple(sorted(pages))
|
||||
|
||||
|
||||
def find_label_end(content: str, label_start: int) -> int:
|
||||
index = label_start + 1
|
||||
while index < len(content):
|
||||
close = content.find("]", index)
|
||||
if close == -1:
|
||||
return -1
|
||||
if close > label_start and content[close - 1] == "\\":
|
||||
index = close + 1
|
||||
continue
|
||||
lookahead = close + 1
|
||||
while lookahead < len(content) and content[lookahead].isspace():
|
||||
lookahead += 1
|
||||
if lookahead < len(content) and content[lookahead] == "(":
|
||||
return close
|
||||
index = close + 1
|
||||
return -1
|
||||
|
||||
|
||||
def find_target_end(content: str, target_start: int) -> int:
|
||||
depth = 0
|
||||
index = target_start
|
||||
while index < len(content):
|
||||
character = content[index]
|
||||
if character == "\\":
|
||||
index += 2
|
||||
continue
|
||||
if character == "(":
|
||||
depth += 1
|
||||
elif character == ")":
|
||||
if depth == 0:
|
||||
return index
|
||||
depth -= 1
|
||||
index += 1
|
||||
return -1
|
||||
|
||||
|
||||
def iter_markdown_links(content: str):
|
||||
"""Yield inline Markdown links only.
|
||||
|
||||
This scanner intentionally handles inline `[]()` links used in the docs tree.
|
||||
It does not parse reference-style links or arbitrary HTML.
|
||||
"""
|
||||
|
||||
index = 0
|
||||
while index < len(content):
|
||||
label_start = content.find("[", index)
|
||||
if label_start == -1:
|
||||
break
|
||||
|
||||
link_start = (
|
||||
label_start - 1
|
||||
if label_start > 0 and content[label_start - 1] == "!"
|
||||
else label_start
|
||||
)
|
||||
label_end = find_label_end(content, label_start)
|
||||
if label_end == -1:
|
||||
index = label_start + 1
|
||||
continue
|
||||
|
||||
target_start = label_end + 1
|
||||
while target_start < len(content) and content[target_start].isspace():
|
||||
target_start += 1
|
||||
if target_start >= len(content) or content[target_start] != "(":
|
||||
index = label_end + 1
|
||||
continue
|
||||
target_start += 1
|
||||
target_end = find_target_end(content, target_start)
|
||||
if target_end == -1:
|
||||
index = label_end + 1
|
||||
continue
|
||||
|
||||
yield MarkdownLink(
|
||||
start=link_start,
|
||||
end=target_end + 1,
|
||||
prefix=content[link_start:target_start],
|
||||
target=content[target_start:target_end],
|
||||
suffix=")",
|
||||
)
|
||||
index = target_end + 1
|
||||
|
||||
|
||||
def split_anchor(target: str) -> tuple[str, str]:
|
||||
if "#" not in target:
|
||||
return target, ""
|
||||
base, anchor = target.split("#", 1)
|
||||
return base, f"#{anchor}"
|
||||
|
||||
|
||||
def prepare_candidate_path(path: PurePosixPath) -> PurePosixPath:
|
||||
if not path.suffix:
|
||||
path = path.with_suffix(".md")
|
||||
|
||||
normalized = PurePosixPath(posixpath.normpath(path.as_posix()))
|
||||
normalized_text = normalized.as_posix()
|
||||
aliased = SOURCE_ALIASES.get(normalized_text, normalized_text)
|
||||
return PurePosixPath(aliased)
|
||||
|
||||
|
||||
def language_for_source(source_path: str) -> str:
|
||||
return PurePosixPath(source_path).parts[0]
|
||||
|
||||
|
||||
def parse_doc_target(target: str) -> tuple[str, str] | None:
|
||||
if target.startswith(("http://", "https://", "mailto:", "#")):
|
||||
return None
|
||||
|
||||
base_target, anchor = split_anchor(target)
|
||||
if not base_target:
|
||||
return None
|
||||
|
||||
suffix = PurePosixPath(base_target).suffix.lower()
|
||||
if suffix and suffix != ".md":
|
||||
return None
|
||||
|
||||
return base_target, anchor
|
||||
|
||||
|
||||
def find_existing_source_path(
|
||||
candidate: PurePosixPath,
|
||||
source_root: Path,
|
||||
source_pages: tuple[str, ...],
|
||||
) -> ResolutionResult:
|
||||
candidate_text = candidate.as_posix()
|
||||
if (source_root / candidate_text).exists():
|
||||
return ResolutionResult(resolved_path=candidate_text)
|
||||
|
||||
language = candidate.parts[0] if candidate.parts else ""
|
||||
suffix = (
|
||||
PurePosixPath(*candidate.parts[1:]).as_posix()
|
||||
if len(candidate.parts) > 1
|
||||
else ""
|
||||
)
|
||||
if not suffix:
|
||||
return ResolutionResult(resolved_path=None)
|
||||
|
||||
prefix = f"{language}/"
|
||||
full_suffix = f"{language}/{suffix}"
|
||||
matches = [
|
||||
page
|
||||
for page in source_pages
|
||||
if page.startswith(prefix)
|
||||
and (page == full_suffix or page.endswith(f"/{suffix}"))
|
||||
]
|
||||
if len(matches) == 1:
|
||||
return ResolutionResult(resolved_path=matches[0])
|
||||
if len(matches) > 1:
|
||||
return ResolutionResult(
|
||||
resolved_path=None,
|
||||
ambiguous_matches=tuple(sorted(matches)),
|
||||
)
|
||||
return ResolutionResult(resolved_path=None)
|
||||
|
||||
|
||||
def resolve_link_path(
|
||||
base_target: str,
|
||||
source_path: str,
|
||||
source_root: Path,
|
||||
source_pages: tuple[str, ...],
|
||||
) -> ResolutionResult:
|
||||
source_language = language_for_source(source_path)
|
||||
|
||||
if base_target.startswith("/"):
|
||||
target = base_target.lstrip("/")
|
||||
if not target:
|
||||
candidate = PurePosixPath(source_language) / "index.md"
|
||||
elif target in {"en", "en/"}:
|
||||
candidate = PurePosixPath("en") / "index.md"
|
||||
elif target in {"zh", "zh/"}:
|
||||
candidate = PurePosixPath("zh") / "index.md"
|
||||
elif target.startswith(("en/", "zh/")):
|
||||
candidate = PurePosixPath(target)
|
||||
else:
|
||||
language_root = source_language if source_language == "en" else "zh"
|
||||
candidate = PurePosixPath(language_root) / target
|
||||
else:
|
||||
candidate = PurePosixPath(source_path).parent / base_target
|
||||
|
||||
candidate = prepare_candidate_path(candidate)
|
||||
return find_existing_source_path(candidate, source_root, source_pages)
|
||||
|
||||
|
||||
class LinkResolver:
|
||||
def __init__(self, source_root: Path):
|
||||
self.source_root = Path(source_root)
|
||||
self.source_pages = discover_source_pages(str(self.source_root))
|
||||
|
||||
def resolve_base_target(
|
||||
self, base_target: str, source_path: str
|
||||
) -> ResolutionResult:
|
||||
return resolve_link_path(
|
||||
base_target=base_target,
|
||||
source_path=source_path,
|
||||
source_root=self.source_root,
|
||||
source_pages=self.source_pages,
|
||||
)
|
||||
|
||||
def resolve_markdown_target(
|
||||
self, target: str, source_path: str
|
||||
) -> tuple[str | None, str]:
|
||||
parsed_target = parse_doc_target(target)
|
||||
if parsed_target is None:
|
||||
return None, ""
|
||||
|
||||
base_target, anchor = parsed_target
|
||||
result = self.resolve_base_target(base_target, source_path)
|
||||
return result.resolved_path, anchor
|
||||
|
||||
|
||||
def rewrite_link_target(target: str, source_path: str, resolver: LinkResolver) -> str:
|
||||
resolved, anchor = resolver.resolve_markdown_target(target, source_path)
|
||||
if resolved is None:
|
||||
return target
|
||||
|
||||
return f"{page_name_for_source(resolved)}{anchor}"
|
||||
|
||||
|
||||
def rewrite_links_in_segment(
|
||||
segment: str,
|
||||
source_path: str,
|
||||
resolver: LinkResolver,
|
||||
) -> str:
|
||||
links = list(iter_markdown_links(segment))
|
||||
if not links:
|
||||
return segment
|
||||
|
||||
result: list[str] = []
|
||||
previous_end = 0
|
||||
for link in links:
|
||||
result.append(segment[previous_end : link.start])
|
||||
result.append(
|
||||
f"{link.prefix}{rewrite_link_target(link.target, source_path, resolver)}{link.suffix}",
|
||||
)
|
||||
previous_end = link.end
|
||||
result.append(segment[previous_end:])
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def iter_segments(content: str):
|
||||
last_end = 0
|
||||
for fenced in FENCED_BLOCK_RE.finditer(content):
|
||||
before = content[last_end : fenced.start()]
|
||||
if before:
|
||||
last_inline_end = 0
|
||||
for inline in INLINE_CODE_RE.finditer(before):
|
||||
if inline.start() > last_inline_end:
|
||||
yield Segment("text", before[last_inline_end : inline.start()])
|
||||
yield Segment("inline_code", inline.group(0))
|
||||
last_inline_end = inline.end()
|
||||
if last_inline_end < len(before):
|
||||
yield Segment("text", before[last_inline_end:])
|
||||
|
||||
yield Segment("code_block", fenced.group(0))
|
||||
last_end = fenced.end()
|
||||
|
||||
tail = content[last_end:]
|
||||
if not tail:
|
||||
return
|
||||
|
||||
last_inline_end = 0
|
||||
for inline in INLINE_CODE_RE.finditer(tail):
|
||||
if inline.start() > last_inline_end:
|
||||
yield Segment("text", tail[last_inline_end : inline.start()])
|
||||
yield Segment("inline_code", inline.group(0))
|
||||
last_inline_end = inline.end()
|
||||
if last_inline_end < len(tail):
|
||||
yield Segment("text", tail[last_inline_end:])
|
||||
|
||||
|
||||
def rewrite_links(
|
||||
content: str,
|
||||
source_path: str,
|
||||
resolver: LinkResolver,
|
||||
) -> str:
|
||||
output: list[str] = []
|
||||
for segment in iter_segments(content):
|
||||
if segment.kind == "text":
|
||||
output.append(
|
||||
rewrite_links_in_segment(
|
||||
segment.text,
|
||||
source_path=source_path,
|
||||
resolver=resolver,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
output.append(segment.text)
|
||||
|
||||
return "".join(output)
|
||||
|
||||
|
||||
def find_unresolved_doc_links(source_root: Path) -> list[str]:
|
||||
unresolved: list[str] = []
|
||||
root = Path(source_root)
|
||||
resolver = LinkResolver(root)
|
||||
|
||||
for source_path in resolver.source_pages:
|
||||
content = (root / source_path).read_text(encoding="utf-8")
|
||||
for link in iter_markdown_links(content):
|
||||
resolved_path, _ = resolver.resolve_markdown_target(
|
||||
link.target, source_path
|
||||
)
|
||||
if resolved_path is not None:
|
||||
continue
|
||||
parsed_target = parse_doc_target(link.target)
|
||||
if parsed_target is None:
|
||||
continue
|
||||
base_target, _ = parsed_target
|
||||
resolution = resolver.resolve_base_target(base_target, source_path)
|
||||
if resolution.ambiguous_matches:
|
||||
unresolved.append(
|
||||
f"{source_path} -> {link.target} (ambiguous: {', '.join(resolution.ambiguous_matches)})",
|
||||
)
|
||||
continue
|
||||
unresolved.append(f"{source_path} -> {link.target}")
|
||||
|
||||
return unresolved
|
||||
|
||||
|
||||
def check_unresolved_doc_links(source_root: Path) -> None:
|
||||
unresolved = find_unresolved_doc_links(source_root)
|
||||
if not unresolved:
|
||||
return
|
||||
|
||||
issues = "\n".join(f"- {item}" for item in unresolved)
|
||||
raise ValueError(f"Unresolved internal doc links found:\n{issues}")
|
||||
|
||||
|
||||
def page_name_for_source(source_path: str) -> str:
|
||||
if not source_path.endswith(".md"):
|
||||
raise ValueError(f"Unsupported source path: {source_path}")
|
||||
return source_path[:-3].replace("/", "-")
|
||||
|
||||
|
||||
def strip_frontmatter(content: str) -> str:
|
||||
if not content.startswith("---\n"):
|
||||
return content
|
||||
|
||||
closing = content.find("\n---\n", 4)
|
||||
if closing == -1:
|
||||
return content
|
||||
|
||||
return content[closing + 5 :].lstrip("\n")
|
||||
|
||||
|
||||
def normalize_content(content: str) -> str:
|
||||
stripped = content.rstrip()
|
||||
if not stripped:
|
||||
return ""
|
||||
return f"{stripped}\n"
|
||||
|
||||
|
||||
def default_title_for_source(source_path: str) -> str:
|
||||
stem = PurePosixPath(source_path).stem
|
||||
return stem.replace("-", " ")
|
||||
|
||||
|
||||
def extract_title(content: str, source_path: str) -> str:
|
||||
match = TITLE_RE.search(content)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return default_title_for_source(source_path)
|
||||
|
||||
|
||||
def build_language_index(language: str, page_names: set[str]) -> str:
|
||||
config = LANG_CONFIG[language]
|
||||
lines = [config["index_title"], "", config["index_intro"], ""]
|
||||
|
||||
for label, page_name in config["index_links"]:
|
||||
if page_name in page_names:
|
||||
lines.append(f"- [{label}]({page_name})")
|
||||
|
||||
return normalize_content("\n".join(lines))
|
||||
|
||||
|
||||
def build_home_page(language: str) -> str:
|
||||
config = LANG_CONFIG[language]
|
||||
lines = ["# AstrBot Wiki", "", config["home_intro"], ""]
|
||||
for label, target in config["home_links"]:
|
||||
lines.append(f"- [{label}]({target})")
|
||||
return normalize_content("\n".join(lines))
|
||||
|
||||
|
||||
def build_sidebar(page_infos: list[PageInfo]) -> str:
|
||||
lines: list[str] = []
|
||||
|
||||
for language in ("zh", "en"):
|
||||
config = LANG_CONFIG[language]
|
||||
infos = [
|
||||
info
|
||||
for info in page_infos
|
||||
if info.language == language and not info.is_index
|
||||
]
|
||||
infos.sort(key=lambda info: info.source_path)
|
||||
|
||||
lines.append(f"### {config['sidebar_language_label']}")
|
||||
lines.append("")
|
||||
lines.append(
|
||||
f"- [{config['sidebar_home_label']}]({config['sidebar_home_target']})",
|
||||
)
|
||||
lines.append(
|
||||
f"- [{config['sidebar_docs_entry_label']}]({language}-index)",
|
||||
)
|
||||
|
||||
grouped: dict[str, list[PageInfo]] = {}
|
||||
for info in infos:
|
||||
grouped.setdefault(info.group, []).append(info)
|
||||
|
||||
for group_name in sorted(grouped):
|
||||
lines.append(f"- {group_name}")
|
||||
for info in grouped[group_name]:
|
||||
lines.append(f" - [{info.title}]({info.page_name})")
|
||||
|
||||
lines.append("")
|
||||
|
||||
return normalize_content("\n".join(lines))
|
||||
|
||||
|
||||
def build_page_info(
|
||||
source_root: Path, source_path: str, resolver: LinkResolver
|
||||
) -> PageInfo:
|
||||
source_file = source_root / source_path
|
||||
content = source_file.read_text(encoding="utf-8")
|
||||
content = strip_frontmatter(content)
|
||||
content = rewrite_links(content, source_path=source_path, resolver=resolver)
|
||||
content = normalize_content(content)
|
||||
|
||||
relative = PurePosixPath(source_path)
|
||||
parts = relative.parts
|
||||
group = "Top Level" if len(parts) <= 2 else parts[1].replace("-", " ")
|
||||
|
||||
return PageInfo(
|
||||
source_path=source_path,
|
||||
page_name=page_name_for_source(source_path),
|
||||
title=extract_title(content, source_path),
|
||||
content=content,
|
||||
language=language_for_source(source_path),
|
||||
group=group,
|
||||
is_index=relative.name == "index.md",
|
||||
)
|
||||
|
||||
|
||||
def read_manifest(wiki_root: Path) -> set[str]:
|
||||
manifest_path = wiki_root / MANIFEST_NAME
|
||||
if not manifest_path.exists():
|
||||
return set()
|
||||
return {
|
||||
line.strip()
|
||||
for line in manifest_path.read_text(encoding="utf-8").splitlines()
|
||||
if line.strip()
|
||||
}
|
||||
|
||||
|
||||
def write_manifest(wiki_root: Path, file_names: set[str]) -> None:
|
||||
manifest_path = wiki_root / MANIFEST_NAME
|
||||
content = "\n".join(sorted(file_names))
|
||||
if content:
|
||||
content = f"{content}\n"
|
||||
manifest_path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def write_file(path: Path, content: str) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
def sync_docs_to_wiki(source_root: Path, wiki_root: Path) -> None:
|
||||
source_root = Path(source_root)
|
||||
wiki_root = Path(wiki_root)
|
||||
wiki_root.mkdir(parents=True, exist_ok=True)
|
||||
resolver = LinkResolver(source_root)
|
||||
|
||||
page_infos = [
|
||||
build_page_info(source_root, source_path, resolver)
|
||||
for source_path in resolver.source_pages
|
||||
]
|
||||
page_names = {info.page_name for info in page_infos}
|
||||
|
||||
for info in page_infos:
|
||||
if info.is_index and not info.content.strip():
|
||||
generated = build_language_index(info.language, page_names)
|
||||
info.content = generated
|
||||
info.title = extract_title(generated, info.source_path)
|
||||
|
||||
desired_files = {f"{info.page_name}.md": info.content for info in page_infos}
|
||||
desired_files["Home.md"] = build_home_page("zh")
|
||||
desired_files["Home-en.md"] = build_home_page("en")
|
||||
desired_files["_Sidebar.md"] = build_sidebar(page_infos)
|
||||
|
||||
previously_managed = read_manifest(wiki_root)
|
||||
for existing_name in previously_managed - set(desired_files):
|
||||
existing_path = wiki_root / existing_name
|
||||
if existing_path.exists():
|
||||
existing_path.unlink()
|
||||
|
||||
for file_name, content in desired_files.items():
|
||||
write_file(wiki_root / file_name, content)
|
||||
|
||||
managed_files = set(desired_files)
|
||||
write_manifest(wiki_root, managed_files)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Sync AstrBot docs content to GitHub wiki pages."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source-root",
|
||||
default=str(repo_root()),
|
||||
help="Path to the AstrBot-docs repository root.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--wiki-root",
|
||||
help="Path to the checked out wiki repository.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check-links-only",
|
||||
action="store_true",
|
||||
help="Validate internal doc links without writing wiki files.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.check_links_only and not args.wiki_root:
|
||||
parser.error("--wiki-root is required unless --check-links-only is set")
|
||||
|
||||
check_unresolved_doc_links(Path(args.source_root))
|
||||
|
||||
if args.check_links_only:
|
||||
return 0
|
||||
|
||||
sync_docs_to_wiki(
|
||||
source_root=Path(args.source_root), wiki_root=Path(args.wiki_root)
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Executable
+5
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
exec python3 "$SCRIPT_DIR/upload_doc_images_to_r2.py" "$@"
|
||||
Executable
+344
@@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from collections.abc import Iterable, Sequence
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
|
||||
IMAGE_EXTS = {
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".webp",
|
||||
".svg",
|
||||
".avif",
|
||||
".bmp",
|
||||
".ico",
|
||||
".tif",
|
||||
".tiff",
|
||||
}
|
||||
|
||||
MD_IMAGE_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
|
||||
HTML_IMG_RE = re.compile(
|
||||
r"<img\b[^>]*\bsrc\s*=\s*([\"'])([^\"']+)\1[^>]*>", re.IGNORECASE
|
||||
)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Upload all locally referenced images from Markdown docs to Cloudflare R2 using rclone."
|
||||
)
|
||||
parser.add_argument("--remote", required=True, help="rclone remote name, e.g. r2")
|
||||
parser.add_argument("--bucket", default="", help="bucket name in remote path")
|
||||
parser.add_argument(
|
||||
"--prefix",
|
||||
default="docs-images",
|
||||
help="destination prefix inside bucket/remote (default: docs-images)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--docs-root",
|
||||
default=".",
|
||||
help="docs root to scan for .md files (default: current directory)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true", help="preview uploads without sending files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list-only", action="store_true", help="only print matched image files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rewrite-markdown",
|
||||
action="store_true",
|
||||
help="rewrite local image links in markdown/html to public URL after upload",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--public-base-url",
|
||||
default="",
|
||||
help="public URL base used for replacement, e.g. https://cdn.example.com/docs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--backup-ext",
|
||||
default=".bak",
|
||||
help="backup extension used when rewriting markdown (default: .bak)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def is_local_ref(ref: str) -> bool:
|
||||
lower = ref.lower()
|
||||
return not (
|
||||
lower.startswith("http://")
|
||||
or lower.startswith("https://")
|
||||
or lower.startswith("//")
|
||||
or lower.startswith("data:")
|
||||
or lower.startswith("mailto:")
|
||||
)
|
||||
|
||||
|
||||
def parse_md_ref(raw: str) -> str:
|
||||
ref = raw.strip()
|
||||
if ref.startswith("<") and ">" in ref:
|
||||
ref = ref[1 : ref.find(">")]
|
||||
else:
|
||||
ref = re.split(r"\s+", ref, maxsplit=1)[0]
|
||||
ref = ref.split("#", 1)[0].split("?", 1)[0]
|
||||
return ref.strip()
|
||||
|
||||
|
||||
def clean_ref(raw: str) -> str:
|
||||
ref = raw.strip().strip("<>")
|
||||
ref = ref.split("#", 1)[0].split("?", 1)[0]
|
||||
return ref.strip()
|
||||
|
||||
|
||||
def resolve_local_ref(md_file: Path, ref: str, root: Path) -> Path | None:
|
||||
if not ref:
|
||||
return None
|
||||
if ref.startswith("/"):
|
||||
candidate = root / ref.lstrip("/")
|
||||
else:
|
||||
candidate = (md_file.parent / ref).resolve()
|
||||
|
||||
try:
|
||||
resolved = candidate.resolve()
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
if not resolved.is_file():
|
||||
return None
|
||||
|
||||
try:
|
||||
resolved.relative_to(root)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if resolved.suffix.lower() not in IMAGE_EXTS:
|
||||
return None
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def find_markdown_files(root: Path) -> list[Path]:
|
||||
files: list[Path] = []
|
||||
for path in root.rglob("*.md"):
|
||||
if "node_modules" in path.parts:
|
||||
continue
|
||||
files.append(path)
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def collect_images(
|
||||
root: Path, md_files: Sequence[Path]
|
||||
) -> tuple[set[Path], list[tuple[Path, str]]]:
|
||||
images: set[Path] = set()
|
||||
missing: list[tuple[Path, str]] = []
|
||||
|
||||
for md_file in md_files:
|
||||
text = md_file.read_text(encoding="utf-8")
|
||||
|
||||
for m in MD_IMAGE_RE.finditer(text):
|
||||
ref = parse_md_ref(m.group(1))
|
||||
if not ref or not is_local_ref(ref):
|
||||
continue
|
||||
resolved = resolve_local_ref(md_file, ref, root)
|
||||
if resolved:
|
||||
images.add(resolved)
|
||||
else:
|
||||
missing.append((md_file, ref))
|
||||
|
||||
for m in HTML_IMG_RE.finditer(text):
|
||||
ref = clean_ref(m.group(2))
|
||||
if not ref or not is_local_ref(ref):
|
||||
continue
|
||||
resolved = resolve_local_ref(md_file, ref, root)
|
||||
if resolved:
|
||||
images.add(resolved)
|
||||
else:
|
||||
missing.append((md_file, ref))
|
||||
|
||||
return images, missing
|
||||
|
||||
|
||||
def build_target(remote: str, bucket: str, prefix: str) -> str:
|
||||
target = f"{remote}:"
|
||||
if bucket:
|
||||
target = f"{remote}:{bucket}"
|
||||
|
||||
p = prefix.strip("/")
|
||||
if p:
|
||||
target = f"{target}/{p}"
|
||||
|
||||
return target
|
||||
|
||||
|
||||
def rel_object_path(root: Path, image_path: Path, prefix: str) -> str:
|
||||
rel = image_path.relative_to(root).as_posix()
|
||||
p = prefix.strip("/")
|
||||
return f"{p}/{rel}" if p else rel
|
||||
|
||||
|
||||
def build_public_url(base: str, object_path: str) -> str:
|
||||
base = base.rstrip("/")
|
||||
encoded_path = quote(object_path, safe="/-._~")
|
||||
return f"{base}/{encoded_path}"
|
||||
|
||||
|
||||
def run_rclone_upload(
|
||||
root: Path, target: str, rel_files: Iterable[str], dry_run: bool
|
||||
) -> None:
|
||||
if shutil.which("rclone") is None:
|
||||
raise RuntimeError("rclone not found in PATH")
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False) as tmp:
|
||||
tmp_path = Path(tmp.name)
|
||||
for rel in rel_files:
|
||||
tmp.write(f"{rel}\n")
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
"rclone",
|
||||
"copy",
|
||||
str(root),
|
||||
target,
|
||||
"--files-from",
|
||||
str(tmp_path),
|
||||
"--create-empty-src-dirs",
|
||||
]
|
||||
if dry_run:
|
||||
cmd.append("--dry-run")
|
||||
|
||||
print()
|
||||
if dry_run:
|
||||
print("Dry-run:", " ".join(cmd))
|
||||
else:
|
||||
print(f"Uploading to: {target}")
|
||||
|
||||
subprocess.run(cmd, check=True)
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def rewrite_markdown_files(
|
||||
root: Path,
|
||||
md_files: Sequence[Path],
|
||||
image_set: set[Path],
|
||||
prefix: str,
|
||||
public_base_url: str,
|
||||
backup_ext: str,
|
||||
) -> int:
|
||||
changed_count = 0
|
||||
|
||||
def to_url(md_file: Path, raw_ref: str, is_markdown: bool) -> str | None:
|
||||
ref = parse_md_ref(raw_ref) if is_markdown else clean_ref(raw_ref)
|
||||
if not ref or not is_local_ref(ref):
|
||||
return None
|
||||
resolved = resolve_local_ref(md_file, ref, root)
|
||||
if not resolved or resolved not in image_set:
|
||||
return None
|
||||
obj = rel_object_path(root, resolved, prefix)
|
||||
return build_public_url(public_base_url, obj)
|
||||
|
||||
for md_file in md_files:
|
||||
text = md_file.read_text(encoding="utf-8")
|
||||
|
||||
def md_repl(match: re.Match[str]) -> str:
|
||||
raw = match.group(1)
|
||||
url = to_url(md_file, raw, is_markdown=True)
|
||||
if not url:
|
||||
return match.group(0)
|
||||
return match.group(0).replace(raw, url, 1)
|
||||
|
||||
def html_repl(match: re.Match[str]) -> str:
|
||||
quote_ch = match.group(1)
|
||||
raw = match.group(2)
|
||||
url = to_url(md_file, raw, is_markdown=False)
|
||||
if not url:
|
||||
return match.group(0)
|
||||
return match.group(0).replace(
|
||||
f"src={quote_ch}{raw}{quote_ch}", f"src={quote_ch}{url}{quote_ch}", 1
|
||||
)
|
||||
|
||||
updated = MD_IMAGE_RE.sub(md_repl, text)
|
||||
updated = HTML_IMG_RE.sub(html_repl, updated)
|
||||
|
||||
if updated != text:
|
||||
if backup_ext:
|
||||
backup_path = md_file.with_suffix(md_file.suffix + backup_ext)
|
||||
backup_path.write_text(text, encoding="utf-8")
|
||||
md_file.write_text(updated, encoding="utf-8")
|
||||
changed_count += 1
|
||||
|
||||
return changed_count
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
if args.rewrite_markdown and not args.public_base_url:
|
||||
print(
|
||||
"Error: --public-base-url is required when using --rewrite-markdown",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
root = Path(args.docs_root).resolve()
|
||||
if not root.is_dir():
|
||||
print(f"Error: docs root not found: {args.docs_root}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if shutil.which("rg") is None:
|
||||
print("Error: rg (ripgrep) not found in PATH", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
md_files = find_markdown_files(root)
|
||||
images, missing = collect_images(root, md_files)
|
||||
|
||||
if not images:
|
||||
print("No local image references found in Markdown docs.")
|
||||
return 0
|
||||
|
||||
rel_files = sorted(p.relative_to(root).as_posix() for p in images)
|
||||
|
||||
print(f"Found {len(rel_files)} image files:")
|
||||
for rel in rel_files:
|
||||
print(rel)
|
||||
|
||||
if missing:
|
||||
print(file=sys.stderr)
|
||||
print(
|
||||
f"Warning: {len(missing)} referenced files were not found (showing up to 20):",
|
||||
file=sys.stderr,
|
||||
)
|
||||
for md, ref in missing[:20]:
|
||||
print(f"{md}\t{ref}", file=sys.stderr)
|
||||
|
||||
if args.list_only:
|
||||
return 0
|
||||
|
||||
target = build_target(args.remote, args.bucket, args.prefix)
|
||||
run_rclone_upload(root, target, rel_files, dry_run=args.dry_run)
|
||||
|
||||
if args.rewrite_markdown and not args.dry_run:
|
||||
changed = rewrite_markdown_files(
|
||||
root=root,
|
||||
md_files=md_files,
|
||||
image_set=images,
|
||||
prefix=args.prefix,
|
||||
public_base_url=args.public_base_url,
|
||||
backup_ext=args.backup_ext,
|
||||
)
|
||||
print(f"Rewrote {changed} markdown files.")
|
||||
|
||||
print("Done.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,8 @@
|
||||
```bash
|
||||
bash scripts/upload-doc-images-to-r2.sh \
|
||||
--remote astrbot-docs-s3 \
|
||||
--bucket astrbot \
|
||||
--prefix docs \
|
||||
--rewrite-markdown \
|
||||
--public-base-url https://files.astrbot.app
|
||||
```
|
||||
Reference in New Issue
Block a user