from flask import Flask, request, jsonify, Response, stream_with_context import requests import json import sys import signal import atexit app = Flask(__name__) OLLAMA_URL = "http://localhost:11434/api/chat" def shutdown(*args): sys.exit(0) signal.signal(signal.SIGTERM, shutdown) signal.signal(signal.SIGINT, shutdown) atexit.register(lambda: print("[engine] shutting down", flush=True)) @app.route("/chat", methods=["POST"]) def chat(): data = request.json messages = data.get("messages", []) stream = data.get("stream", False) model = data.get("model", "") temperature = data.get("temperature", 0.7) think = data.get("think", False) payload = { "model": model, "messages": messages, "stream": stream, "think": think, "options": { "temperature": temperature } } if stream: def generate(): try: with requests.post(OLLAMA_URL, json=payload, stream=True, timeout=600) as r: for line in r.iter_lines(): if line: chunk = json.loads(line) msg = chunk.get("message", {}) # thinking tokens — send as separate type thinking = msg.get("thinking", "") if thinking: yield f"data: {json.dumps({'thinking': thinking})}\n\n" # normal content content = msg.get("content", "") if content: yield f"data: {json.dumps({'content': content})}\n\n" if chunk.get("done"): yield "data: [DONE]\n\n" except Exception as e: yield f"data: {json.dumps({'content': f'[Error: {str(e)}]'})}\n\n" yield "data: [DONE]\n\n" return Response(stream_with_context(generate()), mimetype="text/event-stream") else: r = requests.post(OLLAMA_URL, json=payload, timeout=600) result = r.json() msg = result.get("message", {}) return jsonify({ "content": msg.get("content", ""), "thinking": msg.get("thinking", "") }) @app.route("/health", methods=["GET"]) def health(): try: requests.get("http://localhost:11434/api/tags", timeout=2) ollama_ok = True except: ollama_ok = False return jsonify({"status": "ok", "ollama": ollama_ok}) @app.route("/models", methods=["GET"]) def models(): try: r = requests.get("http://localhost:11434/api/tags", timeout=5) return jsonify(r.json()) except Exception as e: return jsonify({"error": str(e)}), 503 if __name__ == "__main__": print("[engine] starting on port 5001", flush=True) app.run(port=5001, debug=False, use_reloader=False)