assistant-skills/pov-doc/scripts/render_poc.py

393 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Render a Verkada POC Criteria markdown document as a formatted .odt file.
This script takes a POC Criteria markdown file (generated by the pov-doc skill),
replaces logo placeholders and mission statement with actual content, and converts
the result to a professionally formatted .odt using pandoc with a reference document.
Usage:
python render_poc.py --input <markdown_file> --customer-logo <logo.png> \
[--verkada-logo <verkada.png>] [--mission <"mission statement">] \
[--reference-doc <reference.odt>] [--output <output.odt>]
If --verkada-logo is not provided, the Verkada logo placeholder is removed.
If --mission is not provided, the existing mission statement in the markdown is kept.
If --reference-doc is not provided, defaults to the included City of El Paso reference.
If --output is not provided, defaults to the input filename with .odt extension.
Dependencies:
- pandoc (must be installed and on PATH)
- Pillow (for logo resizing)
"""
import argparse
import os
import re
import subprocess
import sys
import tempfile
import zipfile
from pathlib import Path
def find_skill_dir() -> Path:
"""Locate the pov-doc skill directory relative to this script."""
return Path(__file__).resolve().parent.parent
def find_reference_doc() -> Path:
"""Find the default reference ODT template in the skill's examples directory."""
skill_dir = find_skill_dir()
ref = skill_dir / "examples" / "POC Reference Template.odt"
if ref.exists():
return ref
# Fallback: look for the El Paso doc and warn
alt = skill_dir / "examples" / "City of El Paso POC Criteria.odt"
if alt.exists():
print(
"Warning: using City of El Paso reference (may carry over logos). "
"Run create_reference_template() to generate a clean template.",
file=sys.stderr,
)
return alt
return None
def find_verkada_logo() -> Path:
"""Find the bundled Verkada logo in the skill's assets directory."""
skill_dir = find_skill_dir()
logo = skill_dir / "assets" / "verkada-logo.png"
if logo.exists():
return logo
return None
def resize_logo_if_needed(
input_path: str, max_width_inches: float = 2.75, dpi: int = 150
) -> str:
"""
Resize a logo image if it exceeds the target width for print.
Returns the path to the (possibly new) resized image.
"""
try:
from PIL import Image
img = Image.open(input_path)
width_px, height_px = img.size
max_width_px = int(max_width_inches * dpi)
if width_px > max_width_px:
ratio = max_width_px / width_px
new_size = (max_width_px, int(height_px * ratio))
img_resized = img.resize(new_size, Image.LANCZOS)
# Write resized version to a temp file
fd, out_path = tempfile.mkstemp(suffix=".png")
os.close(fd)
img_resized.save(out_path, "PNG")
return out_path
return input_path
except ImportError:
print("Warning: Pillow not installed, skipping logo resize", file=sys.stderr)
return input_path
def process_markdown(
md_content: str,
customer_logo_path: str,
verkada_logo_path: str | None,
mission_statement: str | None,
) -> str:
"""
Process the markdown content:
1. Replace logo HTML comments with markdown image syntax
2. Optionally update the mission statement
3. Remove the Table of Contents section (pandoc can generate one)
"""
# Replace customer logo placeholder (match any logo comment that is NOT Verkada)
md_content = re.sub(
r"<!--\s*Image goes here:(?!.*?Verkada).*?logo.*?-->\s*\n?",
f"![Customer Logo]({customer_logo_path})\n\n",
md_content,
flags=re.IGNORECASE,
)
# Replace Verkada logo placeholder
if verkada_logo_path:
md_content = re.sub(
r"<!--\s*Image goes here:.*?Verkada.*?logo.*?-->\s*\n?",
f"![Verkada Logo]({verkada_logo_path})\n",
md_content,
flags=re.IGNORECASE,
)
else:
# Remove the Verkada logo placeholder entirely
md_content = re.sub(
r"<!--\s*Image goes here:.*?Verkada.*?logo.*?-->\s*\n?",
"",
md_content,
flags=re.IGNORECASE,
)
# Update mission statement if provided
if mission_statement:
# Match the existing italicized mission statement
md_content = re.sub(
r'\*"[^"]*"\*\s*\n',
f'*"{mission_statement}"*\n\n',
md_content,
)
# Add hard line breaks between consecutive TOC link entries so pandoc
# renders each entry on its own line instead of collapsing them into one paragraph
md_content = re.sub(
r'(\]\([^)]*\))\n(\[)',
r'\1\\\n\2',
md_content,
)
# Insert page break before Table of Contents
md_content = re.sub(
r'(#{1,3} \*\*Table of Contents\*\*)',
r'\\newpage\n\n\1',
md_content,
)
return md_content
def convert_with_pandoc(md_path: str, output_path: str, reference_doc: str) -> bool:
"""Convert markdown to .odt using pandoc with a reference document."""
cmd = [
"pandoc",
md_path,
"-o",
output_path,
"--from",
"markdown",
"--to",
"odt",
"--reference-doc",
reference_doc,
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
print(f"pandoc error: {result.stderr}", file=sys.stderr)
return False
return True
except FileNotFoundError:
print(
"Error: pandoc not found. Install it with your package manager.",
file=sys.stderr,
)
return False
except subprocess.TimeoutExpired:
print("Error: pandoc timed out", file=sys.stderr)
return False
def _remove_elements_from_xml(
data: bytes, xpath_to_remove: list[tuple[str, dict]]
) -> bytes:
"""Remove elements matching the given xpath patterns from ODT XML data."""
import xml.etree.ElementTree as ET
tree = ET.fromstring(data)
changed = False
for xpath, ns in xpath_to_remove:
for el in tree.findall(xpath, ns):
for parent in tree.iter():
if el in list(parent):
parent.remove(el)
changed = True
break
if changed:
return ET.tostring(tree, encoding="unicode", xml_declaration=True).encode(
"utf-8"
)
return data
def _odt_ns() -> dict:
return {
"text": "urn:oasis:names:tc:opendocument:xmlns:text:1.0",
"office": "urn:oasis:names:tc:opendocument:xmlns:office:1.0",
"style": "urn:oasis:names:tc:opendocument:xmlns:style:1.0",
"draw": "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0",
}
def remove_heading_bookmarks(odt_path: str) -> None:
"""Remove bookmark-start and bookmark-end elements from the ODT content.xml."""
ns = _odt_ns()
bookmarks_xpath = [
(".//text:bookmark", ns),
(".//text:bookmark-start", ns),
(".//text:bookmark-end", ns),
]
tmp_path = odt_path + ".tmp"
with zipfile.ZipFile(odt_path, "r") as zin:
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
if item.filename == "content.xml":
data = _remove_elements_from_xml(data, bookmarks_xpath)
zout.writestr(item, data)
os.replace(tmp_path, odt_path)
def fix_image_frames(odt_path: str) -> None:
"""Set all draw:frame elements in content.xml to inline (as-char) so text doesn't wrap around images."""
import xml.etree.ElementTree as ET
draw_ns = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
text_ns = "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
svg_ns = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
tmp_path = odt_path + ".tmp"
with zipfile.ZipFile(odt_path, "r") as zin:
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
if item.filename == "content.xml":
tree = ET.fromstring(data)
for frame in tree.iter(f"{{{draw_ns}}}frame"):
frame.set(f"{{{text_ns}}}anchor-type", "as-char")
# Remove absolute positioning attrs that only apply to floating frames
for attr in (f"{{{svg_ns}}}x", f"{{{svg_ns}}}y"):
frame.attrib.pop(attr, None)
data = ET.tostring(tree, encoding="unicode", xml_declaration=True).encode("utf-8")
zout.writestr(item, data)
os.replace(tmp_path, odt_path)
def remove_header_images(odt_path: str) -> None:
"""Remove draw:frame elements from the header in styles.xml to strip reference doc logos."""
ns = _odt_ns()
frames_xpath = [
(".//style:header//draw:frame", ns),
(".//style:footer//draw:frame", ns),
]
tmp_path = odt_path + ".tmp"
with zipfile.ZipFile(odt_path, "r") as zin:
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
if item.filename == "styles.xml":
data = _remove_elements_from_xml(data, frames_xpath)
zout.writestr(item, data)
os.replace(tmp_path, odt_path)
def main():
parser = argparse.ArgumentParser(
description="Render a Verkada POC Criteria markdown document as a formatted .odt file."
)
parser.add_argument(
"--input", "-i", required=True, help="Path to the POC Criteria markdown file"
)
parser.add_argument(
"--customer-logo", required=True, help="Path to the customer logo image"
)
parser.add_argument(
"--verkada-logo", help="Path to the Verkada logo image (optional)"
)
parser.add_argument(
"--mission", help="Customer mission statement (replaces existing)"
)
parser.add_argument("--reference-doc", help="Path to reference .odt for formatting")
parser.add_argument(
"--output", "-o", help="Output .odt path (default: same as input with .odt)"
)
args = parser.parse_args()
# Validate input file
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: input file not found: {input_path}", file=sys.stderr)
sys.exit(1)
# Validate customer logo
if not os.path.isfile(args.customer_logo):
print(f"Error: customer logo not found: {args.customer_logo}", file=sys.stderr)
sys.exit(1)
# Find reference doc
reference_doc = args.reference_doc or find_reference_doc()
if not reference_doc or not Path(reference_doc).exists():
print(
"Error: reference document not found. Provide --reference-doc or place the "
"City of El Paso POC Criteria.odt in the skill's examples directory.",
file=sys.stderr,
)
sys.exit(1)
# Find Verkada logo
verkada_logo = args.verkada_logo or find_verkada_logo()
if verkada_logo and not Path(verkada_logo).exists():
verkada_logo = None
# Determine output path
if args.output:
output_path = args.output
else:
output_path = str(input_path.with_suffix(".odt"))
# Resize logos for print
customer_logo_resized = resize_logo_if_needed(args.customer_logo)
verkada_logo_resized = resize_logo_if_needed(verkada_logo) if verkada_logo else None
# Read and process markdown
md_content = input_path.read_text(encoding="utf-8")
processed = process_markdown(
md_content,
customer_logo_resized,
verkada_logo_resized,
args.mission,
)
# Write processed markdown to temp file (so image paths work)
fd, temp_md = tempfile.mkstemp(suffix=".md")
os.close(fd)
try:
with open(temp_md, "w", encoding="utf-8") as f:
f.write(processed)
# Convert with pandoc
print(f"Converting {input_path.name} to {output_path}...")
print(f" Reference doc: {reference_doc}")
if verkada_logo:
print(f" Verkada logo: {verkada_logo}")
success = convert_with_pandoc(temp_md, output_path, reference_doc)
if success:
remove_heading_bookmarks(output_path)
remove_header_images(output_path)
fix_image_frames(output_path)
out_size = os.path.getsize(output_path)
print(f"Done: {output_path} ({out_size:,} bytes)")
else:
sys.exit(1)
finally:
# Clean up temp files
os.unlink(temp_md)
if customer_logo_resized != args.customer_logo:
os.unlink(customer_logo_resized)
if verkada_logo_resized and verkada_logo_resized != verkada_logo:
os.unlink(verkada_logo_resized)
if __name__ == "__main__":
main()