xetup/tools/extract-docs.py

#!/usr/bin/env python3
"""
extract-docs.py - Extract documentation from PS script headers and generate web/data/descriptions.json

Usage: python3 tools/extract-docs.py
Run from repo root. Reads scripts/*.ps1, writes web/data/descriptions.json.

PS script header format:
    <#
    .SYNOPSIS
        One line description
    .DESCRIPTION
        Multi-line description
    .ITEMS
        slug-of-spec-row: Description of what this item does
        another-slug: Another description
    #>
"""
import os, re, json

SCRIPTS_DIR = os.path.join(os.path.dirname(__file__), '..', 'scripts')
OUTPUT_FILE = os.path.join(os.path.dirname(__file__), '..', 'web', 'data', 'descriptions.json')

def parse_script(path):
    with open(path, encoding='utf-8') as f:
        content = f.read()

    # Extract <# ... #> block
    m = re.search(r'<#(.*?)#>', content, re.DOTALL)
    if not m:
        return None

    block = m.group(1)

    def extract_section(name):
        pattern = r'\.' + name + r'\s*\n(.*?)(?=\n\s*\.[A-Z]|\Z)'
        sm = re.search(pattern, block, re.DOTALL)
        if not sm:
            return ''
        return re.sub(r'\n[ \t]+', '\n', sm.group(1)).strip()

    synopsis    = extract_section('SYNOPSIS').replace('\n', ' ').strip()
    description = extract_section('DESCRIPTION').strip()
    items_raw   = extract_section('ITEMS')

    items = {}
    for line in items_raw.splitlines():
        line = line.strip()
        if not line or ':' not in line:
            continue
        slug, _, desc = line.partition(':')
        slug = slug.strip()
        desc = desc.strip()
        if slug and desc:
            items[slug] = desc

    return {
        'synopsis':    synopsis,
        'description': description,
        'items':       items,
    }

def main():
    result = {}
    for fname in sorted(os.listdir(SCRIPTS_DIR)):
        if not fname.endswith('.ps1'):
            continue
        script_id = fname.replace('.ps1', '')
        path = os.path.join(SCRIPTS_DIR, fname)
        parsed = parse_script(path)
        if parsed:
            result[script_id] = parsed
            item_count = len(parsed['items'])
            print(f'OK  {fname}: {item_count} items')
        else:
            print(f'--- {fname}: no doc header found')

    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(result, f, ensure_ascii=False, indent=2)

    print(f'\nWritten: {OUTPUT_FILE}')
    print(f'Scripts documented: {len(result)}')
    total_items = sum(len(v["items"]) for v in result.values())
    print(f'Total item descriptions: {total_items}')

if __name__ == '__main__':
    main()