assistant-skills/log-work-expense/scripts/classify_image.py

53 lines
1.5 KiB
Python

#!/usr/bin/env python3
"""Classify an image as 'receipt' or 'odometer'.
Usage:
python classify_image.py <image_path>
Prints exactly one word: "receipt" or "odometer"
"""
import sys
import base64
import requests
API_KEY = "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"
MODEL = "google/gemini-2.5-flash-lite"
def main():
if len(sys.argv) != 2:
print("Usage: classify_image.py <image_path>", file=sys.stderr)
sys.exit(1)
path = sys.argv[1]
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode()
ext = path.rsplit(".", 1)[-1].lower()
mime = "image/jpeg" if ext in ("jpg", "jpeg") else "image/png"
resp = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"},
json={
"model": MODEL,
"messages": [{"role": "user", "content": [
{"type": "text", "text": "Classify this image. Reply with exactly one word: receipt or odometer. Nothing else."},
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}}
]}],
"max_tokens": 10
},
timeout=30,
)
text = resp.json()["choices"][0]["message"]["content"].strip().lower()
if "receipt" in text:
print("receipt")
elif "odometer" in text:
print("odometer")
else:
print(f"unknown: {text}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()