#!/usr/bin/env python3 """ JarvisChat - Lightweight Ollama Coding Companion A minimal replacement for Open-WebUI that actually runs on Python 3.13 Talks to Ollama API on localhost:11434 Features: - Persistent profile/memory injected into every conversation - Saved system prompt presets (coding assistant, sysadmin, general, custom) - Streaming chat with conversation history - Model switching between all installed Ollama models - Copy-to-clipboard on code blocks - Token count estimates - SearXNG integration for web search when model is uncertain """ import json import logging import math import sqlite3 import uuid import re from datetime import datetime, timezone from pathlib import Path from contextlib import asynccontextmanager import httpx from fastapi import FastAPI, Request, HTTPException from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse # --- Logging Setup --- import logging.handlers log = logging.getLogger("jarvischat") log.setLevel(logging.DEBUG) syslog_handler = logging.handlers.SysLogHandler(address='/dev/log') syslog_handler.setFormatter(logging.Formatter('jarvischat[%(process)d]: %(levelname)s %(message)s')) log.addHandler(syslog_handler) # --- Configuration --- VERSION = "1.3.0" OLLAMA_BASE = "http://localhost:11434" SEARXNG_BASE = "http://localhost:8888" DB_PATH = Path(__file__).parent / "jarvischat.db" DEFAULT_MODEL = "deepseek-coder:6.7b" # --- Perplexity Threshold --- # Higher perplexity = model is less confident / more uncertain # Tune this based on your models. Start conservative (higher threshold). PERPLEXITY_THRESHOLD = 15.0 # --- Refusal Patterns (fallback for confident refusals) --- REFUSAL_PATTERNS = re.compile(r"|".join([ r"i don'?t have (?:real-?time|current|live)", r"i (?:can'?t|cannot) provide (?:current|real-?time|live)", r"i don'?t have access to (?:current|real-?time|live)", r"(?:current|live|real-?time) (?:data|information|prices?|weather)", r"my (?:knowledge|training) (?:cutoff|only goes|ends)", r"as of my (?:knowledge|training) cutoff", r"i'?m not able to (?:access|provide|browse)", r"(?:check|visit|use) a (?:website|financial|news)", ]), re.IGNORECASE) # --- Hedging patterns to strip from search-augmented responses --- HEDGE_PATTERNS = [ r"^I'?m sorry,?\s*but\s*I\s*(?:can'?t|cannot)\s*assist\s*with\s*that[^.]*\.\s*", r"^I'?m sorry,?\s*but[^.]*(?:previous|incorrect)[^.]*\.\s*", r"(?:But\s+)?[Pp]lease\s+(?:make\s+sure\s+to\s+)?verify\s+(?:the\s+)?(?:data|information|this)\s+(?:from\s+)?(?:reliable\s+)?sources[^.]*\.\s*", r"[Pp]lease\s+verify[^.]*(?:accurate|reliability)[^.]*\.\s*", r"[Bb]ut\s+please\s+(?:make\s+sure|verify|check)[^.]*\.\s*", ] def clean_hedging(text: str) -> str: """Remove hedging sentences from model response.""" cleaned = text for pattern in HEDGE_PATTERNS: cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE) return cleaned.strip() def format_direct_answer(question: str, results: list[dict]) -> str: """Format search results directly when model refuses to help.""" if not results: return "No search results found." lines = [f"Here's what I found:\n"] for r in results[:3]: # Top 3 results lines.append(f"**{r['title']}**") if r['content']: lines.append(f"{r['content']}") lines.append("") return "\n".join(lines).strip() # --- Default Profile --- DEFAULT_PROFILE = """You are a coding companion running locally on a machine called "jarvis". ## Environment - jarvis: Debian 13 (trixie) x86_64, AMD Ryzen 5 5600X, 16GB RAM, AMD RX 6600 XT (8GB VRAM), IP varies - llamadev: Windows 11, primary development machine, IP 192.168.50.108, user "alphaalpaca" - Corsair: Windows 11, gaming/streaming rig - pivault: RPi 5, 8GB RAM, Debian 13, 11TB RAID5 NAS at /mnt/pivault, IP 192.168.50.159 - Router: ASUS ROG Rapture GT-BE98 Pro "BigBlinkyRouter" at 192.168.50.1 - Ollama runs on jarvis with GPU acceleration (ROCm), serving models on port 11434 ## About the User - Experienced developer, BS in Computer Science (Oklahoma State), coding since 1981 (TRS-80) - Deep Unix/Linux background — wrote device drivers at SCO during Xenix era (1990s) - Currently learning Rust, transitioning from decades of PHP - Building a WW2 mobile game in Godot Engine for Android - Runs a YouTube series: "Building a Professional Dev Environment with Local AI" - Working on "Sysadmin's Wizard's Notebook" app concept in Rust - Veteran on fixed income — prefers free/open-source solutions - Home lab enthusiast with Z-Wave and Tapo smart home devices - Streams Fortnite on a regular schedule ## How to Respond - Be direct and concise — no hand-holding, this user knows what they're doing - When showing code, prefer complete working examples over snippets - Default to command-line solutions over GUI when possible - Consider resource constraints (fixed income, specific hardware limits) - Use Rust, Python, or bash unless another language is specifically needed - Explain trade-offs when multiple approaches exist - Don't repeat information the user clearly already knows""" # --- Default System Prompt Presets --- DEFAULT_PRESETS = [ { "name": "Coding Companion", "prompt": "You are a senior software engineer and coding companion. Focus on writing clean, efficient, well-documented code. Provide complete working examples. Explain architectural decisions and trade-offs. Prefer Rust, Python, and bash." }, { "name": "Linux Sysadmin", "prompt": "You are an experienced Linux systems administrator. Focus on command-line solutions, systemd services, networking, storage, and security. Prefer Debian/Ubuntu conventions. Be concise and direct." }, { "name": "General Assistant", "prompt": "You are a helpful general-purpose assistant. Be clear and concise." } ] # --- Database Setup --- def init_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row conn.execute(""" CREATE TABLE IF NOT EXISTS conversations ( id TEXT PRIMARY KEY, title TEXT NOT NULL DEFAULT 'New Chat', model TEXT NOT NULL, created_at TEXT NOT NULL, updated_at TEXT NOT NULL ) """) conn.execute(""" CREATE TABLE IF NOT EXISTS messages ( id INTEGER PRIMARY KEY AUTOINCREMENT, conversation_id TEXT NOT NULL, role TEXT NOT NULL, content TEXT NOT NULL, created_at TEXT NOT NULL, FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE ) """) conn.execute(""" CREATE TABLE IF NOT EXISTS system_presets ( id TEXT PRIMARY KEY, name TEXT NOT NULL, prompt TEXT NOT NULL, is_default INTEGER NOT NULL DEFAULT 0, created_at TEXT NOT NULL ) """) conn.execute(""" CREATE TABLE IF NOT EXISTS profile ( id INTEGER PRIMARY KEY CHECK (id = 1), content TEXT NOT NULL, updated_at TEXT NOT NULL ) """) conn.execute(""" CREATE TABLE IF NOT EXISTS settings ( key TEXT PRIMARY KEY, value TEXT NOT NULL ) """) # Seed default profile if empty existing = conn.execute("SELECT id FROM profile WHERE id = 1").fetchone() if not existing: now = datetime.now(timezone.utc).isoformat() conn.execute("INSERT INTO profile (id, content, updated_at) VALUES (1, ?, ?)", (DEFAULT_PROFILE, now)) # Seed default presets if empty existing_presets = conn.execute("SELECT COUNT(*) as c FROM system_presets").fetchone() if existing_presets["c"] == 0: now = datetime.now(timezone.utc).isoformat() for preset in DEFAULT_PRESETS: conn.execute( "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 1, ?)", (str(uuid.uuid4()), preset["name"], preset["prompt"], now) ) # Default settings defaults = { "profile_enabled": "true", "default_model": DEFAULT_MODEL, "search_enabled": "true", } for key, value in defaults.items(): existing = conn.execute("SELECT key FROM settings WHERE key = ?", (key,)).fetchone() if not existing: conn.execute("INSERT INTO settings (key, value) VALUES (?, ?)", (key, value)) conn.commit() conn.close() def get_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row conn.execute("PRAGMA foreign_keys = ON") return conn # --- SearXNG Integration --- async def query_searxng(query: str, max_results: int = 5) -> list[dict]: """Query SearXNG and return search results.""" log.info(f"Querying SearXNG: '{query}'") async with httpx.AsyncClient() as client: # For weather queries, hit wttr.in directly weather_match = re.search(r"(?:weather|temperature|forecast)\s+(?:in\s+)?(.+?)(?:\s+right now|\s+today|\s+degrees)?$", query, re.IGNORECASE) if weather_match or "weather" in query.lower() or "temperature" in query.lower(): location = weather_match.group(1) if weather_match else re.sub(r"(weather|temperature|forecast|right now|today|degrees)", "", query, flags=re.IGNORECASE).strip() if location: try: log.info(f"Fetching weather for: {location}") resp = await client.get( f"https://wttr.in/{location}?format=3", timeout=10.0, headers={"User-Agent": "curl/7.68.0"} ) if resp.status_code == 200: weather_text = resp.text.strip() log.info(f"wttr.in returned: {weather_text}") return [{ "title": "Current Weather", "url": f"https://wttr.in/{location}", "content": weather_text, }] except Exception as e: log.warning(f"wttr.in error: {e}, falling back to SearXNG") try: resp = await client.get( f"{SEARXNG_BASE}/search", params={ "q": query, "format": "json", "categories": "general", }, timeout=10.0 ) if resp.status_code == 200: data = resp.json() results = [] # Check for direct answers/infoboxes first if data.get("answers"): for answer in data["answers"]: results.append({ "title": "Direct Answer", "url": "", "content": answer, }) log.info(f"Got direct answer: {answer[:100]}") if data.get("infoboxes"): for box in data["infoboxes"]: content = box.get("content", "") if not content and box.get("attributes"): content = " | ".join([f"{a.get('label','')}: {a.get('value','')}" for a in box["attributes"]]) results.append({ "title": box.get("infobox", "Info"), "url": box.get("urls", [{}])[0].get("url", "") if box.get("urls") else "", "content": content, }) log.info(f"Got infobox: {box.get('infobox', '')}") # Then regular results for r in data.get("results", [])[:max_results]: results.append({ "title": r.get("title", ""), "url": r.get("url", ""), "content": r.get("content", ""), }) log.info(f"SearXNG returned {len(results)} total results") for i, r in enumerate(results[:5]): log.debug(f" Result {i+1}: {r['title'][:60]}") return results else: log.warning(f"SearXNG returned status {resp.status_code}") except Exception as e: log.error(f"SearXNG error: {e}") return [] def calculate_perplexity(logprobs: list) -> float: """Calculate perplexity from logprobs. Higher = less confident.""" if not logprobs: return 0.0 avg_logprob = sum(lp["logprob"] for lp in logprobs) / len(logprobs) perplexity = math.exp(-avg_logprob) return perplexity def is_uncertain(logprobs: list, threshold: float = PERPLEXITY_THRESHOLD) -> bool: """Check if model output indicates uncertainty based on perplexity.""" if not logprobs: log.debug("No logprobs returned, skipping uncertainty check") return False perplexity = calculate_perplexity(logprobs) log.info(f"Perplexity: {perplexity:.2f} (threshold: {threshold})") return perplexity > threshold def is_refusal(text: str) -> bool: """Check if model is refusing/admitting it can't help.""" match = REFUSAL_PATTERNS.search(text) if match: log.info(f"Refusal detected: '{match.group()}'") return True return False def format_search_results(results: list[dict]) -> str: """Format search results as context for the model.""" if not results: return "" lines = ["[LIVE WEB DATA]\n"] for i, r in enumerate(results, 1): lines.append(f"{i}. {r['title']}") if r['content']: lines.append(f" {r['content']}") lines.append("") lines.append("\nAnswer directly using the data above. No apologies. No disclaimers. No \"please verify elsewhere.\" Just answer.") return "\n".join(lines) def extract_search_query(user_message: str) -> str: """Extract a good search query from the user's message.""" query = user_message.strip() # For temperature/weather queries, be more specific if re.search(r"temperature|weather", query, re.IGNORECASE): query = re.sub(r"^what('?s| is) the ", "", query, flags=re.IGNORECASE) query = query + " right now degrees" # For price queries, be more specific if re.search(r"price|spot price", query, re.IGNORECASE): query = re.sub(r"^(what('?s| is)|can you tell me) the ", "", query, flags=re.IGNORECASE) query = query + " today USD" # Remove common question words query = re.sub(r"^(what|who|where|when|why|how|is|are|can|could|would|should|do|does|did)\s+", "", query, flags=re.IGNORECASE) # Remove trailing punctuation query = re.sub(r"[?!.]+$", "", query) # Limit length if len(query) > 100: query = query[:100] return query.strip() or user_message[:100] # --- App Lifecycle --- @asynccontextmanager async def lifespan(app: FastAPI): log.info(f"JarvisChat v{VERSION} starting up") log.info(f"Ollama: {OLLAMA_BASE}") log.info(f"SearXNG: {SEARXNG_BASE}") init_db() yield log.info("JarvisChat shutting down") app = FastAPI(title="JarvisChat", lifespan=lifespan) # --- API Routes --- @app.get("/", response_class=HTMLResponse) async def index(): return HTML_PAGE.replace("{{VERSION}}", VERSION) @app.get("/api/models") async def list_models(): async with httpx.AsyncClient() as client: try: resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10) return resp.json() except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama. Is it running?") @app.get("/api/ps") async def running_models(): async with httpx.AsyncClient() as client: try: resp = await client.get(f"{OLLAMA_BASE}/api/ps", timeout=10) return resp.json() except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") @app.post("/api/show") async def show_model(request: Request): """Get model information including context size.""" body = await request.json() async with httpx.AsyncClient() as client: try: resp = await client.post(f"{OLLAMA_BASE}/api/show", json=body, timeout=10) return resp.json() except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") # --- Search Status --- @app.get("/api/search/status") async def search_status(): """Check if SearXNG is available.""" async with httpx.AsyncClient() as client: try: resp = await client.get(f"{SEARXNG_BASE}/healthz", timeout=5) return {"available": resp.status_code == 200} except: # Try a simple search as fallback health check try: resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": "test", "format": "json"}, timeout=5) return {"available": resp.status_code == 200} except: return {"available": False} # --- Profile --- @app.get("/api/profile") async def get_profile(): db = get_db() row = db.execute("SELECT content, updated_at FROM profile WHERE id = 1").fetchone() db.close() if row: return {"content": row["content"], "updated_at": row["updated_at"]} return {"content": "", "updated_at": ""} @app.put("/api/profile") async def update_profile(request: Request): body = await request.json() now = datetime.now(timezone.utc).isoformat() db = get_db() db.execute("UPDATE profile SET content = ?, updated_at = ? WHERE id = 1", (body["content"], now)) db.commit() db.close() return {"status": "ok", "updated_at": now} @app.get("/api/profile/default") async def get_default_profile(): return {"content": DEFAULT_PROFILE} # --- Settings --- @app.get("/api/settings") async def get_settings(): db = get_db() rows = db.execute("SELECT key, value FROM settings").fetchall() db.close() return {row["key"]: row["value"] for row in rows} @app.put("/api/settings") async def update_settings(request: Request): body = await request.json() db = get_db() for key, value in body.items(): db.execute("INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)", (key, str(value))) db.commit() db.close() return {"status": "ok"} # --- System Presets --- @app.get("/api/presets") async def list_presets(): db = get_db() rows = db.execute("SELECT * FROM system_presets ORDER BY is_default DESC, name ASC").fetchall() db.close() return [dict(r) for r in rows] @app.post("/api/presets") async def create_preset(request: Request): body = await request.json() preset_id = str(uuid.uuid4()) now = datetime.now(timezone.utc).isoformat() db = get_db() db.execute( "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 0, ?)", (preset_id, body["name"], body["prompt"], now) ) db.commit() db.close() return {"id": preset_id, "name": body["name"], "prompt": body["prompt"]} @app.put("/api/presets/{preset_id}") async def update_preset(preset_id: str, request: Request): body = await request.json() db = get_db() db.execute("UPDATE system_presets SET name = ?, prompt = ? WHERE id = ?", (body["name"], body["prompt"], preset_id)) db.commit() db.close() return {"status": "ok"} @app.delete("/api/presets/{preset_id}") async def delete_preset(preset_id: str): db = get_db() db.execute("DELETE FROM system_presets WHERE id = ? AND is_default = 0", (preset_id,)) db.commit() db.close() return {"status": "ok"} # --- Conversation CRUD --- @app.get("/api/conversations") async def list_conversations(): db = get_db() rows = db.execute("SELECT * FROM conversations ORDER BY updated_at DESC").fetchall() db.close() return [dict(r) for r in rows] @app.post("/api/conversations") async def create_conversation(request: Request): body = await request.json() conv_id = str(uuid.uuid4()) now = datetime.now(timezone.utc).isoformat() model = body.get("model", DEFAULT_MODEL) title = body.get("title", "New Chat") db = get_db() db.execute( "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", (conv_id, title, model, now, now) ) db.commit() db.close() return {"id": conv_id, "title": title, "model": model, "created_at": now, "updated_at": now} @app.get("/api/conversations/{conv_id}") async def get_conversation(conv_id: str): db = get_db() conv = db.execute("SELECT * FROM conversations WHERE id = ?", (conv_id,)).fetchone() if not conv: db.close() raise HTTPException(status_code=404, detail="Conversation not found") messages = db.execute( "SELECT * FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,) ).fetchall() db.close() return {"conversation": dict(conv), "messages": [dict(m) for m in messages]} @app.put("/api/conversations/{conv_id}") async def update_conversation(conv_id: str, request: Request): body = await request.json() db = get_db() now = datetime.now(timezone.utc).isoformat() if "title" in body: db.execute("UPDATE conversations SET title = ?, updated_at = ? WHERE id = ?", (body["title"], now, conv_id)) if "model" in body: db.execute("UPDATE conversations SET model = ?, updated_at = ? WHERE id = ?", (body["model"], now, conv_id)) db.commit() db.close() return {"status": "ok"} @app.delete("/api/conversations/{conv_id}") async def delete_conversation(conv_id: str): db = get_db() db.execute("DELETE FROM messages WHERE conversation_id = ?", (conv_id,)) db.execute("DELETE FROM conversations WHERE id = ?", (conv_id,)) db.commit() db.close() return {"status": "ok"} @app.delete("/api/conversations") async def delete_all_conversations(): db = get_db() db.execute("DELETE FROM messages") db.execute("DELETE FROM conversations") db.commit() db.close() log.info("Deleted all conversations") return {"status": "ok"} # --- Chat (streaming) --- def build_system_prompt(db, extra_prompt=""): """Build the full system prompt: profile + preset/custom prompt""" parts = [] # Check if profile is enabled settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} if settings.get("profile_enabled", "true") == "true": profile = db.execute("SELECT content FROM profile WHERE id = 1").fetchone() if profile and profile["content"].strip(): parts.append(profile["content"].strip()) if extra_prompt and extra_prompt.strip(): parts.append(extra_prompt.strip()) return "\n\n---\n\n".join(parts) if parts else "" @app.post("/api/chat") async def chat(request: Request): body = await request.json() conv_id = body.get("conversation_id") user_message = body.get("message", "").strip() model = body.get("model", DEFAULT_MODEL) preset_prompt = body.get("system_prompt", "") if not user_message: raise HTTPException(status_code=400, detail="Empty message") db = get_db() now = datetime.now(timezone.utc).isoformat() # Check if search is enabled settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} search_enabled = settings.get("search_enabled", "true") == "true" log.debug(f"Chat request: model={model}, search_enabled={search_enabled}") # Auto-create conversation if needed if not conv_id: conv_id = str(uuid.uuid4()) title = user_message[:80] + ("..." if len(user_message) > 80 else "") db.execute( "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", (conv_id, title, model, now, now) ) else: db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id)) # Save user message db.execute( "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", (conv_id, "user", user_message, now) ) db.commit() # Build message history history_rows = db.execute( "SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,) ).fetchall() # Build system prompt (profile + preset) system_prompt = build_system_prompt(db, preset_prompt) db.close() messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) for row in history_rows: messages.append({"role": row["role"], "content": row["content"]}) ollama_payload = { "model": model, "messages": messages, "stream": True, "logprobs": True, } async def stream_response(): full_response = [] all_logprobs = [] tokens_per_sec = 0.0 async with httpx.AsyncClient() as client: try: async with client.stream( "POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload, timeout=httpx.Timeout(300.0, connect=10.0) ) as resp: async for line in resp.aiter_lines(): if line.strip(): try: chunk = json.loads(line) if "message" in chunk and "content" in chunk["message"]: token = chunk["message"]["content"] full_response.append(token) yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n" # Collect logprobs if "logprobs" in chunk and chunk["logprobs"]: all_logprobs.extend(chunk["logprobs"]) if chunk.get("done"): # Capture timing info from final chunk eval_count = chunk.get("eval_count", 0) eval_duration = chunk.get("eval_duration", 0) tokens_per_sec = (eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0 break except json.JSONDecodeError: pass # Check for uncertainty and search if needed assistant_msg = "".join(full_response) perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0 should_search = is_uncertain(all_logprobs) or is_refusal(assistant_msg) if search_enabled and should_search: # Signal that we're searching yield f"data: {json.dumps({'searching': True, 'conversation_id': conv_id})}\n\n" # Query SearXNG search_query = extract_search_query(user_message) log.info(f"Extracted search query: '{search_query}'") search_results = await query_searxng(search_query) if search_results: # Build augmented messages - inject search context, DON'T include the refusal search_context = format_search_results(search_results) # Rebuild: system prompt + search context + original user question augmented_messages = [] if system_prompt: augmented_messages.append({"role": "system", "content": system_prompt + "\n\n" + search_context}) else: augmented_messages.append({"role": "system", "content": search_context}) # Add conversation history except the last user message (we'll re-add it) for row in history_rows[:-1]: augmented_messages.append({"role": row["role"], "content": row["content"]}) # Re-add the user question augmented_messages.append({"role": "user", "content": user_message}) augmented_payload = { "model": model, "messages": augmented_messages, "stream": True, } # Signal search results found - include actual results for debug yield f"data: {json.dumps({'search_results': len(search_results), 'results_preview': [r['title'] for r in search_results], 'conversation_id': conv_id})}\n\n" # Stream the augmented response yield f"data: {json.dumps({'debug': 'Starting augmented response...', 'conversation_id': conv_id})}\n\n" augmented_response = [] async with client.stream( "POST", f"{OLLAMA_BASE}/api/chat", json=augmented_payload, timeout=httpx.Timeout(300.0, connect=10.0) ) as resp2: async for line in resp2.aiter_lines(): if line.strip(): try: chunk = json.loads(line) if "message" in chunk and "content" in chunk["message"]: token = chunk["message"]["content"] augmented_response.append(token) if chunk.get("done"): break except json.JSONDecodeError: pass # Clean hedging from the response raw_response = "".join(augmented_response) if not raw_response.strip(): log.warning("Augmented response empty, falling back to original") raw_response = assistant_msg cleaned_response = clean_hedging(raw_response) log.debug(f"Cleaned hedging: {len(raw_response)} -> {len(cleaned_response)} chars") # If model STILL refuses after getting search data, format answer ourselves if is_refusal(cleaned_response) or len(cleaned_response) < 20: log.warning("Model refused even with search context, formatting direct answer") cleaned_response = format_direct_answer(user_message, search_results) # Send cleaned response as single chunk yield f"data: {json.dumps({'token': cleaned_response, 'conversation_id': conv_id, 'augmented': True})}\n\n" # Save the cleaned response search_note = "\n\n---\n*🔍 Enhanced with web search results*" saved_msg = cleaned_response + search_note db2 = get_db() db2.execute( "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat()) ) db2.commit() db2.close() yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'searched': True, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" return # No search needed - save original response db2 = get_db() db2.execute( "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", (conv_id, "assistant", assistant_msg, datetime.now(timezone.utc).isoformat()) ) db2.commit() db2.close() yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" except httpx.ConnectError: yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n" except Exception as e: yield f"data: {json.dumps({'error': str(e)})}\n\n" return StreamingResponse(stream_response(), media_type="text/event-stream") # ===================================================================== # FRONTEND # ===================================================================== HTML_PAGE = r""" JarvisChat
Model

JarvisChat — your local coding companion.
Profile context is injected automatically.
Web search kicks in when the model is uncertain.
Pick a model and start building.

PRESET
-- / --
""" if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8080)