This commit is contained in:
Connor Rhodes 2026-04-10 09:15:34 -05:00
parent bbe3094899
commit 920647f67f
3 changed files with 37 additions and 3 deletions

View file

@ -141,6 +141,13 @@ def process_markdown(
md_content,
)
# Insert page break before Table of Contents
md_content = re.sub(
r'(#{1,3} \*\*Table of Contents\*\*)',
r'\\newpage\n\n\1',
md_content,
)
return md_content
@ -228,6 +235,32 @@ def remove_heading_bookmarks(odt_path: str) -> None:
os.replace(tmp_path, odt_path)
def fix_image_frames(odt_path: str) -> None:
"""Set all draw:frame elements in content.xml to inline (as-char) so text doesn't wrap around images."""
import xml.etree.ElementTree as ET
draw_ns = "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
text_ns = "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
svg_ns = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
tmp_path = odt_path + ".tmp"
with zipfile.ZipFile(odt_path, "r") as zin:
with zipfile.ZipFile(tmp_path, "w", zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
if item.filename == "content.xml":
tree = ET.fromstring(data)
for frame in tree.iter(f"{{{draw_ns}}}frame"):
frame.set(f"{{{text_ns}}}anchor-type", "as-char")
# Remove absolute positioning attrs that only apply to floating frames
for attr in (f"{{{svg_ns}}}x", f"{{{svg_ns}}}y"):
frame.attrib.pop(attr, None)
data = ET.tostring(tree, encoding="unicode", xml_declaration=True).encode("utf-8")
zout.writestr(item, data)
os.replace(tmp_path, odt_path)
def remove_header_images(odt_path: str) -> None:
"""Remove draw:frame elements from the header in styles.xml to strip reference doc logos."""
ns = _odt_ns()
@ -334,6 +367,7 @@ def main():
if success:
remove_heading_bookmarks(output_path)
remove_header_images(output_path)
fix_image_frames(output_path)
out_size = os.path.getsize(output_path)
print(f"Done: {output_path} ({out_size:,} bytes)")
else: