add OCR script, use direct API call instead of built-in image tool

This commit is contained in:
Connor Rhodes 2026-04-29 01:50:55 +00:00
parent 5729338167
commit 05a1d6ca13
2 changed files with 67 additions and 1 deletions

View file

@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""OCR a receipt image via OpenRouter and return structured info."""
import requests, json, sys, base64
API_KEY = "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"
MODEL = "google/gemini-2.5-flash-lite"
IMAGE_PATH = sys.argv[1]
with open(IMAGE_PATH, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
resp = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
},
json={
"model": MODEL,
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": (
"Extract from this receipt: date, vendor/restaurant name, and total amount. "
"Return ONLY a JSON object with keys: date (YYYY-MM-DD format), vendor, amount. "
"No other text."
),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{b64}"},
},
],
}
],
"max_tokens": 200,
},
)
data = resp.json()
if "error" in data:
print(json.dumps({"error": data["error"]}), file=sys.stderr)
sys.exit(1)
text = data["choices"][0]["message"]["content"].strip()
# Strip markdown code fences if present
if text.startswith("```"):
text = text.split("\n", 1)[-1]
if text.endswith("```"):
text = text[:-3]
text = text.strip()
try:
result = json.loads(text)
print(json.dumps(result))
except json.JSONDecodeError:
print(json.dumps({"raw": text}))