diff --git a/log-work-expense/SKILL.md b/log-work-expense/SKILL.md index 17f0653..9644d1c 100644 --- a/log-work-expense/SKILL.md +++ b/log-work-expense/SKILL.md @@ -16,23 +16,15 @@ Log receipt images and expense details into the `wip.work_expenses` collection i ## Steps -1. **Extract receipt info** — If the user sent an image, run the OCR script to read the merchant and amount: +1. **Upload the receipt image to S2** — If the user provided an image, upload it to S2 and get the public URL. If they only provided a file or no image, skip this step. -```bash -uv run --with requests python3 scripts/ocr_receipt.py /path/to/image.jpg -``` - -The script is located at `~/notes/skills/log-work-expense/scripts/ocr_receipt.py`. It returns JSON with `merchant` and `amount` fields. If they only provided text, use that. - -2. **Upload the receipt image to S2** — If the user provided an image, upload it to S2 and get the public URL. If they only provided a file or no image, skip this step. - -3. **Insert into MongoDB** — Insert a document into `wip.work_expenses` with this structure: +2. **Insert into MongoDB** — Insert a document into `wip.work_expenses` with this structure: ```json { "file": "https://s2.connorrhodes.com/agent/{sha256}.{ext}", "date": "YYYY-MM-DD", - "merchant": "Name of the merchant from the receipt (e.g. Starbucks, 7 Eleven)", + "merchant": "Name of the merchant", "description": "Context the user provided about the expense", "status": "todo" } @@ -40,14 +32,13 @@ The script is located at `~/notes/skills/log-work-expense/scripts/ocr_receipt.py - **file**: The S2 URL of the uploaded receipt image. Omit if no image was provided. - **date**: Always use today's date (the date you receive the message), formatted as YYYY-MM-DD. -- **merchant**: Extracted from the OCR result. +- **merchant**: Ask the user if not stated. - **description**: What the user told you about the expense (e.g. "travel meal during LTISD onsite"). If they didn't provide context, ask. - **status**: Always set to `"todo"` when inserting. -4. **Confirm** — Reply with a brief confirmation showing what was logged (date, merchant, and file link if applicable). +3. **Confirm** — Reply with a brief confirmation showing what was logged (date, merchant, and file link if applicable). ## Notes - Use `uv run --with pymongo` for MongoDB scripts. - Use the S2 upload endpoint from TOOLS.md for file uploads. -- If the user sends a receipt without context, log what you can read from the image and note that context is pending. diff --git a/log-work-expense/scripts/ocr_receipt.py b/log-work-expense/scripts/ocr_receipt.py deleted file mode 100644 index 5dc8efa..0000000 --- a/log-work-expense/scripts/ocr_receipt.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 -"""OCR a receipt image via OpenRouter and return structured info.""" -import requests, json, sys, base64 - -API_KEY = "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" -MODEL = "google/gemini-2.5-flash-lite" -IMAGE_PATH = sys.argv[1] - -with open(IMAGE_PATH, "rb") as f: - b64 = base64.b64encode(f.read()).decode() - -resp = requests.post( - "https://openrouter.ai/api/v1/chat/completions", - headers={ - "Authorization": f"Bearer {API_KEY}", - "Content-Type": "application/json", - }, - json={ - "model": MODEL, - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": ( - "Extract from this receipt: merchant/restaurant name and total amount. " - "Return ONLY a JSON object with keys: merchant, amount. " - "No other text." - ), - }, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{b64}"}, - }, - ], - } - ], - "max_tokens": 200, - }, -) - -data = resp.json() -if "error" in data: - print(json.dumps({"error": data["error"]}), file=sys.stderr) - sys.exit(1) - -text = data["choices"][0]["message"]["content"].strip() -# Strip markdown code fences if present -if text.startswith("```"): - text = text.split("\n", 1)[-1] - if text.endswith("```"): - text = text[:-3] - text = text.strip() - -try: - result = json.loads(text) - print(json.dumps(result)) -except json.JSONDecodeError: - print(json.dumps({"raw": text}))