fc33b3eb68
* docs: transfer AstrBotDevs/AstrBot-docs to AstrBotDevs/AstrBot * refactor: reorder imports and improve type hints in sync_docs_to_wiki.py and upload_doc_images_to_r2.py * feat: add GitHub Actions workflow to sync wiki with documentation Co-authored-by: Soulter <37870767+Soulter@users.noreply.github.com> Co-authored-by: anka-afk <110004162+anka-afk@users.noreply.github.com> Co-authored-by: zouyonghe <62183434+zouyonghe@users.noreply.github.com> Co-authored-by: shuiping233 <49360196+shuiping233@users.noreply.github.com> Co-authored-by: LIghtJUNction <106986785+LIghtJUNction@users.noreply.github.com> Co-authored-by: Sjshi763 <179909421+Sjshi763@users.noreply.github.com> Co-authored-by: xiewoc <70128845+xiewoc@users.noreply.github.com> Co-authored-by: QingFeng-awa <151742581+QingFeng-awa@users.noreply.github.com> Co-authored-by: PaloMiku <96452465+PaloMiku@users.noreply.github.com> Co-authored-by: shangxueink <138397030+shangxueink@users.noreply.github.com> Co-authored-by: IGCrystal-A <244300990+IGCrystal-A@users.noreply.github.com> Co-authored-by: RC-CHN <67079377+RC-CHN@users.noreply.github.com> Co-authored-by: MC090610 <113341105+MC090610@users.noreply.github.com> Co-authored-by: Waterwzy <196913419+Waterwzy@users.noreply.github.com> Co-authored-by: Lanhuace-Wan <186303160+Lanhuace-Wan@users.noreply.github.com> Co-authored-by: LiAlH4qwq <61769640+LiAlH4qwq@users.noreply.github.com> Co-authored-by: HSOS6 <209910899+HSOS6@users.noreply.github.com> Co-authored-by: th-dd <162813557+th-dd@users.noreply.github.com> Co-authored-by: miaoxutao123 <81676466+miaoxutao123@users.noreply.github.com> Co-authored-by: nuomicici <143102889+nuomicici@users.noreply.github.com> Co-authored-by: nasyt233 <210103278+nasyt233@users.noreply.github.com> Co-authored-by: jlugjb <7426462+jlugjb@users.noreply.github.com> Co-authored-by: Raven95676 <176760093+Raven95676@users.noreply.github.com> Co-authored-by: Futureppo <180109455+Futureppo@users.noreply.github.com> Co-authored-by: MliKiowa <61873808+MliKiowa@users.noreply.github.com> Co-authored-by: Fridemn <150212937+Fridemn@users.noreply.github.com> Co-authored-by: BakaCookie520 <138355736+BakaCookie520@users.noreply.github.com> Co-authored-by: YumeYuka <125112916+YumeYuka@users.noreply.github.com> Co-authored-by: xming521 <32786500+xming521@users.noreply.github.com> Co-authored-by: ywh555hhh <121592812+ywh555hhh@users.noreply.github.com> Co-authored-by: stevessr <89645372+stevessr@users.noreply.github.com> Co-authored-by: roeseth <41995115+roeseth@users.noreply.github.com> Co-authored-by: ikun-1145141 <265925499+ikun-1145141@users.noreply.github.com> Co-authored-by: evpeople <54983536+evpeople@users.noreply.github.com> Co-authored-by: Yue-bin <60509781+Yue-bin@users.noreply.github.com> Co-authored-by: W1ndys <109416673+W1ndys@users.noreply.github.com> Co-authored-by: TheFurina <218887821+TheFurina@users.noreply.github.com> Co-authored-by: Seayon <12275933+Seayon@users.noreply.github.com> Co-authored-by: OnlyblackTea <38585636+OnlyblackTea@users.noreply.github.com> Co-authored-by: ocetars <74854972+ocetars@users.noreply.github.com> Co-authored-by: railgun19457 <117180744+railgun19457@users.noreply.github.com> Co-authored-by: JunieXD <107397009+JunieXD@users.noreply.github.com> Co-authored-by: advent259141 <197440256+advent259141@users.noreply.github.com> Co-authored-by: Doge2077 <91442300+Doge2077@users.noreply.github.com> Co-authored-by: Bocity <23430545+Bocity@users.noreply.github.com> Co-authored-by: Aurora-xk <192227833+Aurora-xk@users.noreply.github.com>
345 lines
9.4 KiB
Python
Executable File
345 lines
9.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from collections.abc import Iterable, Sequence
|
|
from pathlib import Path
|
|
from urllib.parse import quote
|
|
|
|
IMAGE_EXTS = {
|
|
".png",
|
|
".jpg",
|
|
".jpeg",
|
|
".gif",
|
|
".webp",
|
|
".svg",
|
|
".avif",
|
|
".bmp",
|
|
".ico",
|
|
".tif",
|
|
".tiff",
|
|
}
|
|
|
|
MD_IMAGE_RE = re.compile(r"!\[[^\]]*\]\(([^)]+)\)")
|
|
HTML_IMG_RE = re.compile(
|
|
r"<img\b[^>]*\bsrc\s*=\s*([\"'])([^\"']+)\1[^>]*>", re.IGNORECASE
|
|
)
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Upload all locally referenced images from Markdown docs to Cloudflare R2 using rclone."
|
|
)
|
|
parser.add_argument("--remote", required=True, help="rclone remote name, e.g. r2")
|
|
parser.add_argument("--bucket", default="", help="bucket name in remote path")
|
|
parser.add_argument(
|
|
"--prefix",
|
|
default="docs-images",
|
|
help="destination prefix inside bucket/remote (default: docs-images)",
|
|
)
|
|
parser.add_argument(
|
|
"--docs-root",
|
|
default=".",
|
|
help="docs root to scan for .md files (default: current directory)",
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run", action="store_true", help="preview uploads without sending files"
|
|
)
|
|
parser.add_argument(
|
|
"--list-only", action="store_true", help="only print matched image files"
|
|
)
|
|
parser.add_argument(
|
|
"--rewrite-markdown",
|
|
action="store_true",
|
|
help="rewrite local image links in markdown/html to public URL after upload",
|
|
)
|
|
parser.add_argument(
|
|
"--public-base-url",
|
|
default="",
|
|
help="public URL base used for replacement, e.g. https://cdn.example.com/docs",
|
|
)
|
|
parser.add_argument(
|
|
"--backup-ext",
|
|
default=".bak",
|
|
help="backup extension used when rewriting markdown (default: .bak)",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def is_local_ref(ref: str) -> bool:
|
|
lower = ref.lower()
|
|
return not (
|
|
lower.startswith("http://")
|
|
or lower.startswith("https://")
|
|
or lower.startswith("//")
|
|
or lower.startswith("data:")
|
|
or lower.startswith("mailto:")
|
|
)
|
|
|
|
|
|
def parse_md_ref(raw: str) -> str:
|
|
ref = raw.strip()
|
|
if ref.startswith("<") and ">" in ref:
|
|
ref = ref[1 : ref.find(">")]
|
|
else:
|
|
ref = re.split(r"\s+", ref, maxsplit=1)[0]
|
|
ref = ref.split("#", 1)[0].split("?", 1)[0]
|
|
return ref.strip()
|
|
|
|
|
|
def clean_ref(raw: str) -> str:
|
|
ref = raw.strip().strip("<>")
|
|
ref = ref.split("#", 1)[0].split("?", 1)[0]
|
|
return ref.strip()
|
|
|
|
|
|
def resolve_local_ref(md_file: Path, ref: str, root: Path) -> Path | None:
|
|
if not ref:
|
|
return None
|
|
if ref.startswith("/"):
|
|
candidate = root / ref.lstrip("/")
|
|
else:
|
|
candidate = (md_file.parent / ref).resolve()
|
|
|
|
try:
|
|
resolved = candidate.resolve()
|
|
except FileNotFoundError:
|
|
return None
|
|
|
|
if not resolved.is_file():
|
|
return None
|
|
|
|
try:
|
|
resolved.relative_to(root)
|
|
except ValueError:
|
|
return None
|
|
|
|
if resolved.suffix.lower() not in IMAGE_EXTS:
|
|
return None
|
|
|
|
return resolved
|
|
|
|
|
|
def find_markdown_files(root: Path) -> list[Path]:
|
|
files: list[Path] = []
|
|
for path in root.rglob("*.md"):
|
|
if "node_modules" in path.parts:
|
|
continue
|
|
files.append(path)
|
|
return sorted(files)
|
|
|
|
|
|
def collect_images(
|
|
root: Path, md_files: Sequence[Path]
|
|
) -> tuple[set[Path], list[tuple[Path, str]]]:
|
|
images: set[Path] = set()
|
|
missing: list[tuple[Path, str]] = []
|
|
|
|
for md_file in md_files:
|
|
text = md_file.read_text(encoding="utf-8")
|
|
|
|
for m in MD_IMAGE_RE.finditer(text):
|
|
ref = parse_md_ref(m.group(1))
|
|
if not ref or not is_local_ref(ref):
|
|
continue
|
|
resolved = resolve_local_ref(md_file, ref, root)
|
|
if resolved:
|
|
images.add(resolved)
|
|
else:
|
|
missing.append((md_file, ref))
|
|
|
|
for m in HTML_IMG_RE.finditer(text):
|
|
ref = clean_ref(m.group(2))
|
|
if not ref or not is_local_ref(ref):
|
|
continue
|
|
resolved = resolve_local_ref(md_file, ref, root)
|
|
if resolved:
|
|
images.add(resolved)
|
|
else:
|
|
missing.append((md_file, ref))
|
|
|
|
return images, missing
|
|
|
|
|
|
def build_target(remote: str, bucket: str, prefix: str) -> str:
|
|
target = f"{remote}:"
|
|
if bucket:
|
|
target = f"{remote}:{bucket}"
|
|
|
|
p = prefix.strip("/")
|
|
if p:
|
|
target = f"{target}/{p}"
|
|
|
|
return target
|
|
|
|
|
|
def rel_object_path(root: Path, image_path: Path, prefix: str) -> str:
|
|
rel = image_path.relative_to(root).as_posix()
|
|
p = prefix.strip("/")
|
|
return f"{p}/{rel}" if p else rel
|
|
|
|
|
|
def build_public_url(base: str, object_path: str) -> str:
|
|
base = base.rstrip("/")
|
|
encoded_path = quote(object_path, safe="/-._~")
|
|
return f"{base}/{encoded_path}"
|
|
|
|
|
|
def run_rclone_upload(
|
|
root: Path, target: str, rel_files: Iterable[str], dry_run: bool
|
|
) -> None:
|
|
if shutil.which("rclone") is None:
|
|
raise RuntimeError("rclone not found in PATH")
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False) as tmp:
|
|
tmp_path = Path(tmp.name)
|
|
for rel in rel_files:
|
|
tmp.write(f"{rel}\n")
|
|
|
|
try:
|
|
cmd = [
|
|
"rclone",
|
|
"copy",
|
|
str(root),
|
|
target,
|
|
"--files-from",
|
|
str(tmp_path),
|
|
"--create-empty-src-dirs",
|
|
]
|
|
if dry_run:
|
|
cmd.append("--dry-run")
|
|
|
|
print()
|
|
if dry_run:
|
|
print("Dry-run:", " ".join(cmd))
|
|
else:
|
|
print(f"Uploading to: {target}")
|
|
|
|
subprocess.run(cmd, check=True)
|
|
finally:
|
|
tmp_path.unlink(missing_ok=True)
|
|
|
|
|
|
def rewrite_markdown_files(
|
|
root: Path,
|
|
md_files: Sequence[Path],
|
|
image_set: set[Path],
|
|
prefix: str,
|
|
public_base_url: str,
|
|
backup_ext: str,
|
|
) -> int:
|
|
changed_count = 0
|
|
|
|
def to_url(md_file: Path, raw_ref: str, is_markdown: bool) -> str | None:
|
|
ref = parse_md_ref(raw_ref) if is_markdown else clean_ref(raw_ref)
|
|
if not ref or not is_local_ref(ref):
|
|
return None
|
|
resolved = resolve_local_ref(md_file, ref, root)
|
|
if not resolved or resolved not in image_set:
|
|
return None
|
|
obj = rel_object_path(root, resolved, prefix)
|
|
return build_public_url(public_base_url, obj)
|
|
|
|
for md_file in md_files:
|
|
text = md_file.read_text(encoding="utf-8")
|
|
|
|
def md_repl(match: re.Match[str]) -> str:
|
|
raw = match.group(1)
|
|
url = to_url(md_file, raw, is_markdown=True)
|
|
if not url:
|
|
return match.group(0)
|
|
return match.group(0).replace(raw, url, 1)
|
|
|
|
def html_repl(match: re.Match[str]) -> str:
|
|
quote_ch = match.group(1)
|
|
raw = match.group(2)
|
|
url = to_url(md_file, raw, is_markdown=False)
|
|
if not url:
|
|
return match.group(0)
|
|
return match.group(0).replace(
|
|
f"src={quote_ch}{raw}{quote_ch}", f"src={quote_ch}{url}{quote_ch}", 1
|
|
)
|
|
|
|
updated = MD_IMAGE_RE.sub(md_repl, text)
|
|
updated = HTML_IMG_RE.sub(html_repl, updated)
|
|
|
|
if updated != text:
|
|
if backup_ext:
|
|
backup_path = md_file.with_suffix(md_file.suffix + backup_ext)
|
|
backup_path.write_text(text, encoding="utf-8")
|
|
md_file.write_text(updated, encoding="utf-8")
|
|
changed_count += 1
|
|
|
|
return changed_count
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
|
|
if args.rewrite_markdown and not args.public_base_url:
|
|
print(
|
|
"Error: --public-base-url is required when using --rewrite-markdown",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
root = Path(args.docs_root).resolve()
|
|
if not root.is_dir():
|
|
print(f"Error: docs root not found: {args.docs_root}", file=sys.stderr)
|
|
return 1
|
|
|
|
if shutil.which("rg") is None:
|
|
print("Error: rg (ripgrep) not found in PATH", file=sys.stderr)
|
|
return 1
|
|
|
|
md_files = find_markdown_files(root)
|
|
images, missing = collect_images(root, md_files)
|
|
|
|
if not images:
|
|
print("No local image references found in Markdown docs.")
|
|
return 0
|
|
|
|
rel_files = sorted(p.relative_to(root).as_posix() for p in images)
|
|
|
|
print(f"Found {len(rel_files)} image files:")
|
|
for rel in rel_files:
|
|
print(rel)
|
|
|
|
if missing:
|
|
print(file=sys.stderr)
|
|
print(
|
|
f"Warning: {len(missing)} referenced files were not found (showing up to 20):",
|
|
file=sys.stderr,
|
|
)
|
|
for md, ref in missing[:20]:
|
|
print(f"{md}\t{ref}", file=sys.stderr)
|
|
|
|
if args.list_only:
|
|
return 0
|
|
|
|
target = build_target(args.remote, args.bucket, args.prefix)
|
|
run_rclone_upload(root, target, rel_files, dry_run=args.dry_run)
|
|
|
|
if args.rewrite_markdown and not args.dry_run:
|
|
changed = rewrite_markdown_files(
|
|
root=root,
|
|
md_files=md_files,
|
|
image_set=images,
|
|
prefix=args.prefix,
|
|
public_base_url=args.public_base_url,
|
|
backup_ext=args.backup_ext,
|
|
)
|
|
print(f"Rewrote {changed} markdown files.")
|
|
|
|
print("Done.")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|