assistant-skills/pov-doc/scripts/render_poc.py

#!/usr/bin/env python3
"""
Render a Verkada POC Criteria markdown document as a formatted .odt file.

This script takes a POC Criteria markdown file (generated by the pov-doc skill),
replaces logo placeholders and mission statement with actual content, and converts
the result to a professionally formatted .odt using pandoc with a reference document.

Usage:
    python render_poc.py --input <markdown_file> --customer-logo <logo.png> \
        [--verkada-logo <verkada.png>] [--mission <"mission statement">] \
        [--reference-doc <reference.odt>] [--output <output.odt>]

If --verkada-logo is not provided, the Verkada logo placeholder is removed.
If --mission is not provided, the existing mission statement in the markdown is kept.
If --reference-doc is not provided, defaults to the included City of El Paso reference.
If --output is not provided, defaults to the input filename with .odt extension.

Dependencies:
    - pandoc (must be installed and on PATH)
    - Pillow (for logo resizing)
"""

import argparse
import os
import re
import subprocess
import sys
import tempfile
import zipfile
from pathlib import Path


def find_skill_dir() -> Path:
    """Locate the pov-doc skill directory relative to this script."""
    return Path(__file__).resolve().parent.parent


def find_reference_doc() -> Path:
    """Find the default reference ODT template in the skill's examples directory."""
    skill_dir = find_skill_dir()
    ref = skill_dir / "examples" / "POC Reference Template.odt"
    if ref.exists():
        return ref
    # Fallback: look for the El Paso doc and warn
    alt = skill_dir / "examples" / "City of El Paso POC Criteria.odt"
    if alt.exists():
        print(
            "Warning: using City of El Paso reference (may carry over logos). "
            "Run create_reference_template() to generate a clean template.",
            file=sys.stderr,
        )
        return alt
    return None


def find_verkada_logo() -> Path:
    """Find the bundled Verkada logo in the skill's assets directory."""
    skill_dir = find_skill_dir()
    logo = skill_dir / "assets" / "verkada-logo.png"
    if logo.exists():
        return logo
    return None


def resize_logo_if_needed(
    input_path: str, max_width_inches: float = 2.75, dpi: int = 150
) -> str:
    """
    Resize a logo image if it exceeds the target width for print.
    Returns the path to the (possibly new) resized image.
    """
    try:
        from PIL import Image

        img = Image.open(input_path)
        width_px, height_px = img.size
        max_width_px = int(max_width_inches * dpi)

        if width_px > max_width_px:
            ratio = max_width_px / width_px
            new_size = (max_width_px, int(height_px * ratio))
            img_resized = img.resize(new_size, Image.LANCZOS)

            # Write resized version to a temp file
            fd, out_path = tempfile.mkstemp(suffix=".png")
            os.close(fd)
            img_resized.save(out_path, "PNG")
            return out_path

        return input_path
    except ImportError:
        print("Warning: Pillow not installed, skipping logo resize", file=sys.stderr)
        return input_path


def process_markdown(
    md_content: str,
    customer_logo_path: str,
    verkada_logo_path: str | None,
    mission_statement: str | None,
) -> str:
    """
    Process the markdown content:
    1. Replace logo HTML comments with markdown image syntax
    2. Optionally update the mission statement
    3. Remove the Table of Contents section (pandoc can generate one)
    """

    # Replace customer logo placeholder (match any logo comment that is NOT Verkada)
    md_content = re.sub(
        r"<!--\s*Image goes here:(?!.*?Verkada).*?logo.*?-->\s*\n?",
        f"![Customer Logo]({customer_logo_path})\n\n",
        md_content,
        flags=re.IGNORECASE,
    )

    # Replace Verkada logo placeholder
    if verkada_logo_path:
        md_content = re.sub(
            r"<!--\s*Image goes here:.*?Verkada.*?logo.*?-->\s*\n?",
            f"![Verkada Logo]({verkada_logo_path})\n",
            md_content,
            flags=re.IGNORECASE,
        )
    else:
        # Remove the Verkada logo placeholder entirely
        md_content = re.sub(
            r"<!--\s*Image goes here:.*?Verkada.*?logo.*?-->\s*\n?",
            "",
            md_content,
            flags=re.IGNORECASE,
        )

    # Update mission statement if provided
    if mission_statement:
        # Match the existing italicized mission statement
        md_content = re.sub(
            r'\*"[^"]*"\*\s*\n',
            f'*"{mission_statement}"*\n\n',
            md_content,
        )

    # Add hard line breaks between consecutive TOC link entries so pandoc
    # renders each entry on its own line instead of collapsing them into one paragraph
    md_content = re.sub(
        r'(\]\([^)]*\))\n(\[)',
        r'\1\\\n\2',
        md_content,
    )

    # Insert page break before Table of Contents
    md_content = re.sub(
        r'(#{1,3} \*\*Table of Contents\*\*)',
        r'\\newpage\n\n\1',
        md_content,
    )

    return md_content


def convert_with_pandoc(md_path: str, output_path: str, reference_doc: str) -> bool:
    """Convert markdown to .odt using pandoc with a reference document."""
    cmd = [
        "pandoc",
        md_path,
        "-o",
        output_path,
        "--from",
        "markdown",
        "--to",
        "odt",
        "--reference-doc",
        reference_doc,
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        if result.returncode != 0:
            print(f"pandoc error: {result.stderr}", file=sys.stderr)
            return False
        return True
    except FileNotFoundError:
        print(
            "Error: pandoc not found. Install it with your package manager.",
            file=sys.stderr,
        )
        return False
    except subprocess.TimeoutExpired:
        print("Error: pandoc timed out", file=sys.stderr)
        return False


def _remove_elements_from_xml(
    data: bytes, xpath_to_remove: list[tuple[str, dict]]
) -> bytes:
    """Remove elements matching the given xpath patterns from ODT XML data."""
    import xml.etree.ElementTree as ET

    tree = ET.fromstring(data)
    changed = False
    for xpath, ns in xpath_to_remove:
        for el in tree.findall(xpath, ns):
            for parent in tree.iter():
                if el in list(parent):
                    parent.remove(el)
                    changed = True
                    break
    if changed:
        return ET.tostring(tree, encoding="unicode", xml_declaration=True).encode(
            "utf-8"
        )
    return data


def _odt_ns() -> dict:
    return {
        "text": "urn:oasis:names:tc:opendocument:xmlns:text:1.0",
        "office": "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
        "style": "urn:oasis:names:tc:opendocument:xmlns:style:1.0",
        "draw": "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
    }


def remove_heading_bookmarks(odt_path: str) -> None:
    """Remove bookmark-start and bookmark-end elements from the ODT content.xml."""
    ns = _odt_ns()
    bookmarks_xpath = [
        (".//text:bookmark", ns),
        (".//text:bookmark-start", ns),
        (".//text:bookmark-end", ns),
    ]

    tmp_path = odt_path + ".tmp"
    with zipfile.ZipFile(odt_path, "r") as zin:
        with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                data = zin.read(item.filename)
                if item.filename == "content.xml":
                    data = _remove_elements_from_xml(data, bookmarks_xpath)
                zout.writestr(item, data)

    os.replace(tmp_path, odt_path)


def fix_image_frames(odt_path: str) -> None:
    """Set all draw:frame elements in content.xml to inline (as-char) so text doesn't wrap around images."""
    import xml.etree.ElementTree as ET

    draw_ns = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
    text_ns = "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
    svg_ns = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"

    tmp_path = odt_path + ".tmp"
    with zipfile.ZipFile(odt_path, "r") as zin:
        with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                data = zin.read(item.filename)
                if item.filename == "content.xml":
                    tree = ET.fromstring(data)
                    for frame in tree.iter(f"{{{draw_ns}}}frame"):
                        frame.set(f"{{{text_ns}}}anchor-type", "as-char")
                        # Remove absolute positioning attrs that only apply to floating frames
                        for attr in (f"{{{svg_ns}}}x", f"{{{svg_ns}}}y"):
                            frame.attrib.pop(attr, None)
                    data = ET.tostring(tree, encoding="unicode", xml_declaration=True).encode("utf-8")
                zout.writestr(item, data)

    os.replace(tmp_path, odt_path)


def remove_header_images(odt_path: str) -> None:
    """Remove draw:frame elements from the header in styles.xml to strip reference doc logos."""
    ns = _odt_ns()
    frames_xpath = [
        (".//style:header//draw:frame", ns),
        (".//style:footer//draw:frame", ns),
    ]

    tmp_path = odt_path + ".tmp"
    with zipfile.ZipFile(odt_path, "r") as zin:
        with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
            for item in zin.infolist():
                data = zin.read(item.filename)
                if item.filename == "styles.xml":
                    data = _remove_elements_from_xml(data, frames_xpath)
                zout.writestr(item, data)

    os.replace(tmp_path, odt_path)


def main():
    parser = argparse.ArgumentParser(
        description="Render a Verkada POC Criteria markdown document as a formatted .odt file."
    )
    parser.add_argument(
        "--input", "-i", required=True, help="Path to the POC Criteria markdown file"
    )
    parser.add_argument(
        "--customer-logo", required=True, help="Path to the customer logo image"
    )
    parser.add_argument(
        "--verkada-logo", help="Path to the Verkada logo image (optional)"
    )
    parser.add_argument(
        "--mission", help="Customer mission statement (replaces existing)"
    )
    parser.add_argument("--reference-doc", help="Path to reference .odt for formatting")
    parser.add_argument(
        "--output", "-o", help="Output .odt path (default: same as input with .odt)"
    )

    args = parser.parse_args()

    # Validate input file
    input_path = Path(args.input)
    if not input_path.exists():
        print(f"Error: input file not found: {input_path}", file=sys.stderr)
        sys.exit(1)

    # Validate customer logo
    if not os.path.isfile(args.customer_logo):
        print(f"Error: customer logo not found: {args.customer_logo}", file=sys.stderr)
        sys.exit(1)

    # Find reference doc
    reference_doc = args.reference_doc or find_reference_doc()
    if not reference_doc or not Path(reference_doc).exists():
        print(
            "Error: reference document not found. Provide --reference-doc or place the "
            "City of El Paso POC Criteria.odt in the skill's examples directory.",
            file=sys.stderr,
        )
        sys.exit(1)

    # Find Verkada logo
    verkada_logo = args.verkada_logo or find_verkada_logo()
    if verkada_logo and not Path(verkada_logo).exists():
        verkada_logo = None

    # Determine output path
    if args.output:
        output_path = args.output
    else:
        output_path = str(input_path.with_suffix(".odt"))

    # Resize logos for print
    customer_logo_resized = resize_logo_if_needed(args.customer_logo)
    verkada_logo_resized = resize_logo_if_needed(verkada_logo) if verkada_logo else None

    # Read and process markdown
    md_content = input_path.read_text(encoding="utf-8")
    processed = process_markdown(
        md_content,
        customer_logo_resized,
        verkada_logo_resized,
        args.mission,
    )

    # Write processed markdown to temp file (so image paths work)
    fd, temp_md = tempfile.mkstemp(suffix=".md")
    os.close(fd)
    try:
        with open(temp_md, "w", encoding="utf-8") as f:
            f.write(processed)

        # Convert with pandoc
        print(f"Converting {input_path.name} to {output_path}...")
        print(f"  Reference doc: {reference_doc}")
        if verkada_logo:
            print(f"  Verkada logo: {verkada_logo}")

        success = convert_with_pandoc(temp_md, output_path, reference_doc)

        if success:
            remove_heading_bookmarks(output_path)
            remove_header_images(output_path)
            fix_image_frames(output_path)
            out_size = os.path.getsize(output_path)
            print(f"Done: {output_path} ({out_size:,} bytes)")
        else:
            sys.exit(1)
    finally:
        # Clean up temp files
        os.unlink(temp_md)
        if customer_logo_resized != args.customer_logo:
            os.unlink(customer_logo_resized)
        if verkada_logo_resized and verkada_logo_resized != verkada_logo:
            os.unlink(verkada_logo_resized)


if __name__ == "__main__":
    main()