From 46cccc90870582a7ada7719ecd43eae4e9791236 Mon Sep 17 00:00:00 2001 From: gramps Date: Mon, 9 Mar 2026 20:06:01 -0700 Subject: [PATCH] Initial commit --- .gitignore | 5 + app.py | 1702 ++++++++++++++++++++++++++++++++++++++++++++++ readme.md | 251 +++++++ requirements.txt | 3 + 4 files changed, 1961 insertions(+) create mode 100644 .gitignore create mode 100644 app.py create mode 100644 readme.md create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d2c5cc6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.db +*.png +*.py- +__pycache__/ +venv/ diff --git a/app.py b/app.py new file mode 100644 index 0000000..fd8938b --- /dev/null +++ b/app.py @@ -0,0 +1,1702 @@ +#!/usr/bin/env python3 +""" +JarvisChat - Lightweight Ollama Coding Companion +A minimal replacement for Open-WebUI that actually runs on Python 3.13 +Talks to Ollama API on localhost:11434 + +Features: + - Persistent profile/memory injected into every conversation + - Saved system prompt presets (coding assistant, sysadmin, general, custom) + - Streaming chat with conversation history + - Model switching between all installed Ollama models + - Copy-to-clipboard on code blocks + - Token count estimates + - SearXNG integration for web search when model is uncertain +""" + +import json +import logging +import math +import sqlite3 +import uuid +import re +from datetime import datetime, timezone +from pathlib import Path +from contextlib import asynccontextmanager + +import httpx +from fastapi import FastAPI, Request, HTTPException +from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse + +# --- Logging Setup --- +import logging.handlers + +log = logging.getLogger("jarvischat") +log.setLevel(logging.DEBUG) +syslog_handler = logging.handlers.SysLogHandler(address='/dev/log') +syslog_handler.setFormatter(logging.Formatter('jarvischat[%(process)d]: %(levelname)s %(message)s')) +log.addHandler(syslog_handler) + +# --- Configuration --- +VERSION = "1.3.0" +OLLAMA_BASE = "http://localhost:11434" +SEARXNG_BASE = "http://localhost:8888" +DB_PATH = Path(__file__).parent / "jarvischat.db" +DEFAULT_MODEL = "deepseek-coder:6.7b" + +# --- Perplexity Threshold --- +# Higher perplexity = model is less confident / more uncertain +# Tune this based on your models. Start conservative (higher threshold). +PERPLEXITY_THRESHOLD = 15.0 + +# --- Refusal Patterns (fallback for confident refusals) --- +REFUSAL_PATTERNS = re.compile(r"|".join([ + r"i don'?t have (?:real-?time|current|live)", + r"i (?:can'?t|cannot) provide (?:current|real-?time|live)", + r"i don'?t have access to (?:current|real-?time|live)", + r"(?:current|live|real-?time) (?:data|information|prices?|weather)", + r"my (?:knowledge|training) (?:cutoff|only goes|ends)", + r"as of my (?:knowledge|training) cutoff", + r"i'?m not able to (?:access|provide|browse)", + r"(?:check|visit|use) a (?:website|financial|news)", +]), re.IGNORECASE) + +# --- Hedging patterns to strip from search-augmented responses --- +HEDGE_PATTERNS = [ + r"^I'?m sorry,?\s*but\s*I\s*(?:can'?t|cannot)\s*assist\s*with\s*that[^.]*\.\s*", + r"^I'?m sorry,?\s*but[^.]*(?:previous|incorrect)[^.]*\.\s*", + r"(?:But\s+)?[Pp]lease\s+(?:make\s+sure\s+to\s+)?verify\s+(?:the\s+)?(?:data|information|this)\s+(?:from\s+)?(?:reliable\s+)?sources[^.]*\.\s*", + r"[Pp]lease\s+verify[^.]*(?:accurate|reliability)[^.]*\.\s*", + r"[Bb]ut\s+please\s+(?:make\s+sure|verify|check)[^.]*\.\s*", +] + +def clean_hedging(text: str) -> str: + """Remove hedging sentences from model response.""" + cleaned = text + for pattern in HEDGE_PATTERNS: + cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE) + return cleaned.strip() + +def format_direct_answer(question: str, results: list[dict]) -> str: + """Format search results directly when model refuses to help.""" + if not results: + return "No search results found." + + lines = [f"Here's what I found:\n"] + for r in results[:3]: # Top 3 results + lines.append(f"**{r['title']}**") + if r['content']: + lines.append(f"{r['content']}") + lines.append("") + + return "\n".join(lines).strip() + +# --- Default Profile --- +DEFAULT_PROFILE = """You are a coding companion running locally on a machine called "jarvis". + +## Environment +- jarvis: Debian 13 (trixie) x86_64, AMD Ryzen 5 5600X, 16GB RAM, AMD RX 6600 XT (8GB VRAM), IP varies +- llamadev: Windows 11, primary development machine, IP 192.168.50.108, user "alphaalpaca" +- Corsair: Windows 11, gaming/streaming rig +- pivault: RPi 5, 8GB RAM, Debian 13, 11TB RAID5 NAS at /mnt/pivault, IP 192.168.50.159 +- Router: ASUS ROG Rapture GT-BE98 Pro "BigBlinkyRouter" at 192.168.50.1 +- Ollama runs on jarvis with GPU acceleration (ROCm), serving models on port 11434 + +## About the User +- Experienced developer, BS in Computer Science (Oklahoma State), coding since 1981 (TRS-80) +- Deep Unix/Linux background — wrote device drivers at SCO during Xenix era (1990s) +- Currently learning Rust, transitioning from decades of PHP +- Building a WW2 mobile game in Godot Engine for Android +- Runs a YouTube series: "Building a Professional Dev Environment with Local AI" +- Working on "Sysadmin's Wizard's Notebook" app concept in Rust +- Veteran on fixed income — prefers free/open-source solutions +- Home lab enthusiast with Z-Wave and Tapo smart home devices +- Streams Fortnite on a regular schedule + +## How to Respond +- Be direct and concise — no hand-holding, this user knows what they're doing +- When showing code, prefer complete working examples over snippets +- Default to command-line solutions over GUI when possible +- Consider resource constraints (fixed income, specific hardware limits) +- Use Rust, Python, or bash unless another language is specifically needed +- Explain trade-offs when multiple approaches exist +- Don't repeat information the user clearly already knows""" + +# --- Default System Prompt Presets --- +DEFAULT_PRESETS = [ + { + "name": "Coding Companion", + "prompt": "You are a senior software engineer and coding companion. Focus on writing clean, efficient, well-documented code. Provide complete working examples. Explain architectural decisions and trade-offs. Prefer Rust, Python, and bash." + }, + { + "name": "Linux Sysadmin", + "prompt": "You are an experienced Linux systems administrator. Focus on command-line solutions, systemd services, networking, storage, and security. Prefer Debian/Ubuntu conventions. Be concise and direct." + }, + { + "name": "General Assistant", + "prompt": "You are a helpful general-purpose assistant. Be clear and concise." + } +] + +# --- Database Setup --- +def init_db(): + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + conn.execute(""" + CREATE TABLE IF NOT EXISTS conversations ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL DEFAULT 'New Chat', + model TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + conversation_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS system_presets ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + prompt TEXT NOT NULL, + is_default INTEGER NOT NULL DEFAULT 0, + created_at TEXT NOT NULL + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS profile ( + id INTEGER PRIMARY KEY CHECK (id = 1), + content TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """) + conn.execute(""" + CREATE TABLE IF NOT EXISTS settings ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ) + """) + + # Seed default profile if empty + existing = conn.execute("SELECT id FROM profile WHERE id = 1").fetchone() + if not existing: + now = datetime.now(timezone.utc).isoformat() + conn.execute("INSERT INTO profile (id, content, updated_at) VALUES (1, ?, ?)", + (DEFAULT_PROFILE, now)) + + # Seed default presets if empty + existing_presets = conn.execute("SELECT COUNT(*) as c FROM system_presets").fetchone() + if existing_presets["c"] == 0: + now = datetime.now(timezone.utc).isoformat() + for preset in DEFAULT_PRESETS: + conn.execute( + "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 1, ?)", + (str(uuid.uuid4()), preset["name"], preset["prompt"], now) + ) + + # Default settings + defaults = { + "profile_enabled": "true", + "default_model": DEFAULT_MODEL, + "search_enabled": "true", + } + for key, value in defaults.items(): + existing = conn.execute("SELECT key FROM settings WHERE key = ?", (key,)).fetchone() + if not existing: + conn.execute("INSERT INTO settings (key, value) VALUES (?, ?)", (key, value)) + + conn.commit() + conn.close() + +def get_db(): + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + return conn + +# --- SearXNG Integration --- +async def query_searxng(query: str, max_results: int = 5) -> list[dict]: + """Query SearXNG and return search results.""" + log.info(f"Querying SearXNG: '{query}'") + async with httpx.AsyncClient() as client: + # For weather queries, hit wttr.in directly + weather_match = re.search(r"(?:weather|temperature|forecast)\s+(?:in\s+)?(.+?)(?:\s+right now|\s+today|\s+degrees)?$", query, re.IGNORECASE) + if weather_match or "weather" in query.lower() or "temperature" in query.lower(): + location = weather_match.group(1) if weather_match else re.sub(r"(weather|temperature|forecast|right now|today|degrees)", "", query, flags=re.IGNORECASE).strip() + if location: + try: + log.info(f"Fetching weather for: {location}") + resp = await client.get( + f"https://wttr.in/{location}?format=3", + timeout=10.0, + headers={"User-Agent": "curl/7.68.0"} + ) + if resp.status_code == 200: + weather_text = resp.text.strip() + log.info(f"wttr.in returned: {weather_text}") + return [{ + "title": "Current Weather", + "url": f"https://wttr.in/{location}", + "content": weather_text, + }] + except Exception as e: + log.warning(f"wttr.in error: {e}, falling back to SearXNG") + + try: + resp = await client.get( + f"{SEARXNG_BASE}/search", + params={ + "q": query, + "format": "json", + "categories": "general", + }, + timeout=10.0 + ) + if resp.status_code == 200: + data = resp.json() + results = [] + + # Check for direct answers/infoboxes first + if data.get("answers"): + for answer in data["answers"]: + results.append({ + "title": "Direct Answer", + "url": "", + "content": answer, + }) + log.info(f"Got direct answer: {answer[:100]}") + + if data.get("infoboxes"): + for box in data["infoboxes"]: + content = box.get("content", "") + if not content and box.get("attributes"): + content = " | ".join([f"{a.get('label','')}: {a.get('value','')}" for a in box["attributes"]]) + results.append({ + "title": box.get("infobox", "Info"), + "url": box.get("urls", [{}])[0].get("url", "") if box.get("urls") else "", + "content": content, + }) + log.info(f"Got infobox: {box.get('infobox', '')}") + + # Then regular results + for r in data.get("results", [])[:max_results]: + results.append({ + "title": r.get("title", ""), + "url": r.get("url", ""), + "content": r.get("content", ""), + }) + + log.info(f"SearXNG returned {len(results)} total results") + for i, r in enumerate(results[:5]): + log.debug(f" Result {i+1}: {r['title'][:60]}") + return results + else: + log.warning(f"SearXNG returned status {resp.status_code}") + except Exception as e: + log.error(f"SearXNG error: {e}") + return [] + +def calculate_perplexity(logprobs: list) -> float: + """Calculate perplexity from logprobs. Higher = less confident.""" + if not logprobs: + return 0.0 + avg_logprob = sum(lp["logprob"] for lp in logprobs) / len(logprobs) + perplexity = math.exp(-avg_logprob) + return perplexity + +def is_uncertain(logprobs: list, threshold: float = PERPLEXITY_THRESHOLD) -> bool: + """Check if model output indicates uncertainty based on perplexity.""" + if not logprobs: + log.debug("No logprobs returned, skipping uncertainty check") + return False + perplexity = calculate_perplexity(logprobs) + log.info(f"Perplexity: {perplexity:.2f} (threshold: {threshold})") + return perplexity > threshold + +def is_refusal(text: str) -> bool: + """Check if model is refusing/admitting it can't help.""" + match = REFUSAL_PATTERNS.search(text) + if match: + log.info(f"Refusal detected: '{match.group()}'") + return True + return False + +def format_search_results(results: list[dict]) -> str: + """Format search results as context for the model.""" + if not results: + return "" + + lines = ["[LIVE WEB DATA]\n"] + for i, r in enumerate(results, 1): + lines.append(f"{i}. {r['title']}") + if r['content']: + lines.append(f" {r['content']}") + lines.append("") + + lines.append("\nAnswer directly using the data above. No apologies. No disclaimers. No \"please verify elsewhere.\" Just answer.") + return "\n".join(lines) + +def extract_search_query(user_message: str) -> str: + """Extract a good search query from the user's message.""" + query = user_message.strip() + + # For temperature/weather queries, be more specific + if re.search(r"temperature|weather", query, re.IGNORECASE): + query = re.sub(r"^what('?s| is) the ", "", query, flags=re.IGNORECASE) + query = query + " right now degrees" + + # For price queries, be more specific + if re.search(r"price|spot price", query, re.IGNORECASE): + query = re.sub(r"^(what('?s| is)|can you tell me) the ", "", query, flags=re.IGNORECASE) + query = query + " today USD" + + # Remove common question words + query = re.sub(r"^(what|who|where|when|why|how|is|are|can|could|would|should|do|does|did)\s+", "", query, flags=re.IGNORECASE) + # Remove trailing punctuation + query = re.sub(r"[?!.]+$", "", query) + # Limit length + if len(query) > 100: + query = query[:100] + return query.strip() or user_message[:100] + +# --- App Lifecycle --- +@asynccontextmanager +async def lifespan(app: FastAPI): + log.info(f"JarvisChat v{VERSION} starting up") + log.info(f"Ollama: {OLLAMA_BASE}") + log.info(f"SearXNG: {SEARXNG_BASE}") + init_db() + yield + log.info("JarvisChat shutting down") + +app = FastAPI(title="JarvisChat", lifespan=lifespan) + +# --- API Routes --- + +@app.get("/", response_class=HTMLResponse) +async def index(): + return HTML_PAGE.replace("{{VERSION}}", VERSION) + +@app.get("/api/models") +async def list_models(): + async with httpx.AsyncClient() as client: + try: + resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10) + return resp.json() + except httpx.ConnectError: + raise HTTPException(status_code=502, detail="Cannot connect to Ollama. Is it running?") + +@app.get("/api/ps") +async def running_models(): + async with httpx.AsyncClient() as client: + try: + resp = await client.get(f"{OLLAMA_BASE}/api/ps", timeout=10) + return resp.json() + except httpx.ConnectError: + raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") + +@app.post("/api/show") +async def show_model(request: Request): + """Get model information including context size.""" + body = await request.json() + async with httpx.AsyncClient() as client: + try: + resp = await client.post(f"{OLLAMA_BASE}/api/show", json=body, timeout=10) + return resp.json() + except httpx.ConnectError: + raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") + +# --- Search Status --- +@app.get("/api/search/status") +async def search_status(): + """Check if SearXNG is available.""" + async with httpx.AsyncClient() as client: + try: + resp = await client.get(f"{SEARXNG_BASE}/healthz", timeout=5) + return {"available": resp.status_code == 200} + except: + # Try a simple search as fallback health check + try: + resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": "test", "format": "json"}, timeout=5) + return {"available": resp.status_code == 200} + except: + return {"available": False} + +# --- Profile --- + +@app.get("/api/profile") +async def get_profile(): + db = get_db() + row = db.execute("SELECT content, updated_at FROM profile WHERE id = 1").fetchone() + db.close() + if row: + return {"content": row["content"], "updated_at": row["updated_at"]} + return {"content": "", "updated_at": ""} + +@app.put("/api/profile") +async def update_profile(request: Request): + body = await request.json() + now = datetime.now(timezone.utc).isoformat() + db = get_db() + db.execute("UPDATE profile SET content = ?, updated_at = ? WHERE id = 1", + (body["content"], now)) + db.commit() + db.close() + return {"status": "ok", "updated_at": now} + +@app.get("/api/profile/default") +async def get_default_profile(): + return {"content": DEFAULT_PROFILE} + +# --- Settings --- + +@app.get("/api/settings") +async def get_settings(): + db = get_db() + rows = db.execute("SELECT key, value FROM settings").fetchall() + db.close() + return {row["key"]: row["value"] for row in rows} + +@app.put("/api/settings") +async def update_settings(request: Request): + body = await request.json() + db = get_db() + for key, value in body.items(): + db.execute("INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)", (key, str(value))) + db.commit() + db.close() + return {"status": "ok"} + +# --- System Presets --- + +@app.get("/api/presets") +async def list_presets(): + db = get_db() + rows = db.execute("SELECT * FROM system_presets ORDER BY is_default DESC, name ASC").fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/presets") +async def create_preset(request: Request): + body = await request.json() + preset_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + db = get_db() + db.execute( + "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 0, ?)", + (preset_id, body["name"], body["prompt"], now) + ) + db.commit() + db.close() + return {"id": preset_id, "name": body["name"], "prompt": body["prompt"]} + +@app.put("/api/presets/{preset_id}") +async def update_preset(preset_id: str, request: Request): + body = await request.json() + db = get_db() + db.execute("UPDATE system_presets SET name = ?, prompt = ? WHERE id = ?", + (body["name"], body["prompt"], preset_id)) + db.commit() + db.close() + return {"status": "ok"} + +@app.delete("/api/presets/{preset_id}") +async def delete_preset(preset_id: str): + db = get_db() + db.execute("DELETE FROM system_presets WHERE id = ? AND is_default = 0", (preset_id,)) + db.commit() + db.close() + return {"status": "ok"} + +# --- Conversation CRUD --- + +@app.get("/api/conversations") +async def list_conversations(): + db = get_db() + rows = db.execute("SELECT * FROM conversations ORDER BY updated_at DESC").fetchall() + db.close() + return [dict(r) for r in rows] + +@app.post("/api/conversations") +async def create_conversation(request: Request): + body = await request.json() + conv_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc).isoformat() + model = body.get("model", DEFAULT_MODEL) + title = body.get("title", "New Chat") + db = get_db() + db.execute( + "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now) + ) + db.commit() + db.close() + return {"id": conv_id, "title": title, "model": model, "created_at": now, "updated_at": now} + +@app.get("/api/conversations/{conv_id}") +async def get_conversation(conv_id: str): + db = get_db() + conv = db.execute("SELECT * FROM conversations WHERE id = ?", (conv_id,)).fetchone() + if not conv: + db.close() + raise HTTPException(status_code=404, detail="Conversation not found") + messages = db.execute( + "SELECT * FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,) + ).fetchall() + db.close() + return {"conversation": dict(conv), "messages": [dict(m) for m in messages]} + +@app.put("/api/conversations/{conv_id}") +async def update_conversation(conv_id: str, request: Request): + body = await request.json() + db = get_db() + now = datetime.now(timezone.utc).isoformat() + if "title" in body: + db.execute("UPDATE conversations SET title = ?, updated_at = ? WHERE id = ?", + (body["title"], now, conv_id)) + if "model" in body: + db.execute("UPDATE conversations SET model = ?, updated_at = ? WHERE id = ?", + (body["model"], now, conv_id)) + db.commit() + db.close() + return {"status": "ok"} + +@app.delete("/api/conversations/{conv_id}") +async def delete_conversation(conv_id: str): + db = get_db() + db.execute("DELETE FROM messages WHERE conversation_id = ?", (conv_id,)) + db.execute("DELETE FROM conversations WHERE id = ?", (conv_id,)) + db.commit() + db.close() + return {"status": "ok"} + +@app.delete("/api/conversations") +async def delete_all_conversations(): + db = get_db() + db.execute("DELETE FROM messages") + db.execute("DELETE FROM conversations") + db.commit() + db.close() + log.info("Deleted all conversations") + return {"status": "ok"} + +# --- Chat (streaming) --- + +def build_system_prompt(db, extra_prompt=""): + """Build the full system prompt: profile + preset/custom prompt""" + parts = [] + + # Check if profile is enabled + settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} + if settings.get("profile_enabled", "true") == "true": + profile = db.execute("SELECT content FROM profile WHERE id = 1").fetchone() + if profile and profile["content"].strip(): + parts.append(profile["content"].strip()) + + if extra_prompt and extra_prompt.strip(): + parts.append(extra_prompt.strip()) + + return "\n\n---\n\n".join(parts) if parts else "" + +@app.post("/api/chat") +async def chat(request: Request): + body = await request.json() + conv_id = body.get("conversation_id") + user_message = body.get("message", "").strip() + model = body.get("model", DEFAULT_MODEL) + preset_prompt = body.get("system_prompt", "") + + if not user_message: + raise HTTPException(status_code=400, detail="Empty message") + + db = get_db() + now = datetime.now(timezone.utc).isoformat() + + # Check if search is enabled + settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} + search_enabled = settings.get("search_enabled", "true") == "true" + log.debug(f"Chat request: model={model}, search_enabled={search_enabled}") + + # Auto-create conversation if needed + if not conv_id: + conv_id = str(uuid.uuid4()) + title = user_message[:80] + ("..." if len(user_message) > 80 else "") + db.execute( + "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now) + ) + else: + db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id)) + + # Save user message + db.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "user", user_message, now) + ) + db.commit() + + # Build message history + history_rows = db.execute( + "SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", + (conv_id,) + ).fetchall() + + # Build system prompt (profile + preset) + system_prompt = build_system_prompt(db, preset_prompt) + db.close() + + messages = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + for row in history_rows: + messages.append({"role": row["role"], "content": row["content"]}) + + ollama_payload = { + "model": model, + "messages": messages, + "stream": True, + "logprobs": True, + } + + async def stream_response(): + full_response = [] + all_logprobs = [] + tokens_per_sec = 0.0 + async with httpx.AsyncClient() as client: + try: + async with client.stream( + "POST", + f"{OLLAMA_BASE}/api/chat", + json=ollama_payload, + timeout=httpx.Timeout(300.0, connect=10.0) + ) as resp: + async for line in resp.aiter_lines(): + if line.strip(): + try: + chunk = json.loads(line) + if "message" in chunk and "content" in chunk["message"]: + token = chunk["message"]["content"] + full_response.append(token) + yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n" + # Collect logprobs + if "logprobs" in chunk and chunk["logprobs"]: + all_logprobs.extend(chunk["logprobs"]) + if chunk.get("done"): + # Capture timing info from final chunk + eval_count = chunk.get("eval_count", 0) + eval_duration = chunk.get("eval_duration", 0) + tokens_per_sec = (eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0 + break + except json.JSONDecodeError: + pass + + # Check for uncertainty and search if needed + assistant_msg = "".join(full_response) + perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0 + should_search = is_uncertain(all_logprobs) or is_refusal(assistant_msg) + + if search_enabled and should_search: + # Signal that we're searching + yield f"data: {json.dumps({'searching': True, 'conversation_id': conv_id})}\n\n" + + # Query SearXNG + search_query = extract_search_query(user_message) + log.info(f"Extracted search query: '{search_query}'") + search_results = await query_searxng(search_query) + + if search_results: + # Build augmented messages - inject search context, DON'T include the refusal + search_context = format_search_results(search_results) + + # Rebuild: system prompt + search context + original user question + augmented_messages = [] + if system_prompt: + augmented_messages.append({"role": "system", "content": system_prompt + "\n\n" + search_context}) + else: + augmented_messages.append({"role": "system", "content": search_context}) + + # Add conversation history except the last user message (we'll re-add it) + for row in history_rows[:-1]: + augmented_messages.append({"role": row["role"], "content": row["content"]}) + + # Re-add the user question + augmented_messages.append({"role": "user", "content": user_message}) + + augmented_payload = { + "model": model, + "messages": augmented_messages, + "stream": True, + } + + # Signal search results found - include actual results for debug + yield f"data: {json.dumps({'search_results': len(search_results), 'results_preview': [r['title'] for r in search_results], 'conversation_id': conv_id})}\n\n" + + # Stream the augmented response + yield f"data: {json.dumps({'debug': 'Starting augmented response...', 'conversation_id': conv_id})}\n\n" + augmented_response = [] + async with client.stream( + "POST", + f"{OLLAMA_BASE}/api/chat", + json=augmented_payload, + timeout=httpx.Timeout(300.0, connect=10.0) + ) as resp2: + async for line in resp2.aiter_lines(): + if line.strip(): + try: + chunk = json.loads(line) + if "message" in chunk and "content" in chunk["message"]: + token = chunk["message"]["content"] + augmented_response.append(token) + if chunk.get("done"): + break + except json.JSONDecodeError: + pass + + # Clean hedging from the response + raw_response = "".join(augmented_response) + if not raw_response.strip(): + log.warning("Augmented response empty, falling back to original") + raw_response = assistant_msg + cleaned_response = clean_hedging(raw_response) + log.debug(f"Cleaned hedging: {len(raw_response)} -> {len(cleaned_response)} chars") + + # If model STILL refuses after getting search data, format answer ourselves + if is_refusal(cleaned_response) or len(cleaned_response) < 20: + log.warning("Model refused even with search context, formatting direct answer") + cleaned_response = format_direct_answer(user_message, search_results) + + # Send cleaned response as single chunk + yield f"data: {json.dumps({'token': cleaned_response, 'conversation_id': conv_id, 'augmented': True})}\n\n" + + # Save the cleaned response + search_note = "\n\n---\n*🔍 Enhanced with web search results*" + saved_msg = cleaned_response + search_note + + db2 = get_db() + db2.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat()) + ) + db2.commit() + db2.close() + + yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'searched': True, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" + return + + # No search needed - save original response + db2 = get_db() + db2.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "assistant", assistant_msg, datetime.now(timezone.utc).isoformat()) + ) + db2.commit() + db2.close() + yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" + + except httpx.ConnectError: + yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n" + except Exception as e: + yield f"data: {json.dumps({'error': str(e)})}\n\n" + + return StreamingResponse(stream_response(), media_type="text/event-stream") + +# ===================================================================== +# FRONTEND +# ===================================================================== + +HTML_PAGE = r""" + + + + +JarvisChat + + + + + + + + + + + +
+
+
+ Model + +
+
+ + +
+
+ +
+
+ +

JarvisChat — your local coding companion.
Profile context is injected automatically.
Web search kicks in when the model is uncertain.
Pick a model and start building.

+
+
+ +
+
+ PRESET + +
+
+ +
+
+
-- / --
+
+ +
+
+
+ + + + +""" + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..5f41323 --- /dev/null +++ b/readme.md @@ -0,0 +1,251 @@ +# ⚡ JarvisChat + +**A lightweight Ollama coding companion that runs on Python 3.13** + +![Version](https://img.shields.io/badge/version-1.3.0-blue) +![Python](https://img.shields.io/badge/python-3.13-green) +![License](https://img.shields.io/badge/license-MIT-orange) + +JarvisChat is a single-file FastAPI application that provides a clean, responsive web interface for Ollama. It features persistent memory, automatic web search when the model is uncertain, and real-time token tracking. + +## Features + +- **Persistent Profile/Memory** — Your context is injected into every conversation automatically +- **System Prompt Presets** — Switch between coding assistant, sysadmin, general, or custom modes +- **Streaming Chat** — Real-time token streaming with conversation history +- **Model Switching** — Hot-swap between all installed Ollama models +- **Web Search Integration** — SearXNG kicks in automatically when the model is uncertain (perplexity-based) +- **Weather Queries** — Direct wttr.in integration for weather questions +- **Token Thermometer** — Visual context usage bar with live updates as you type +- **Perplexity & Speed Badges** — See model confidence (PPL) and tokens/sec on each response +- **Copy-to-Clipboard** — One-click copy on all code blocks +- **Dark Theme** — Easy on the eyes for long coding sessions + +## Architecture + +``` +Browser ◄──► app.py (FastAPI) ◄──► Ollama (LLM) + │ + ▼ (when uncertain) + SearXNG (web search) +``` + +JarvisChat acts as middleware between your browser and Ollama. When the model's perplexity exceeds a threshold (default 15.0) or it refuses to answer, JarvisChat automatically queries SearXNG, injects the results, and re-prompts the model. + +**This is NOT training** — SearXNG is only used at runtime as a fallback for uncertain responses. + +## Requirements + +- Python 3.11+ (tested on 3.13) +- Ollama running locally (default: `localhost:11434`) +- SearXNG (optional, for web search — default: `localhost:8888`) + +## Installation + +```bash +# Clone or download app.py +git clone https://llgit.llamachile.shop/gramps/jarvischat.git +cd jarvischat + +# Install dependencies +pip install fastapi httpx uvicorn + +# Run +python app.py +# or +uvicorn app:app --host 0.0.0.0 --port 8080 +``` + +Open `http://localhost:8080` in your browser. + +## Running as a Service + +**Important:** Although JarvisChat is a single-file Python application, it's designed to run as a persistent service alongside Ollama — not as a one-off script. Both services should start on boot. + +### systemd Service (recommended) + +Create `/etc/systemd/system/jarvischat.service`: + +```ini +[Unit] +Description=JarvisChat - Ollama Web UI +After=network.target ollama.service +Wants=ollama.service + +[Service] +Type=simple +User=jarvischat +WorkingDirectory=/opt/jarvischat +ExecStart=/usr/bin/python3 app.py +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target +``` + +Then enable and start: + +```bash +sudo systemctl daemon-reload +sudo systemctl enable jarvischat +sudo systemctl start jarvischat +``` + +### Verify Both Services + +```bash +# Check Ollama +systemctl status ollama + +# Check JarvisChat +systemctl status jarvischat + +# View JarvisChat logs +journalctl -t jarvischat -f +``` + +## Configuration + +Edit these constants at the top of `app.py`: + +```python +VERSION = "1.3.0" +OLLAMA_BASE = "http://localhost:11434" +SEARXNG_BASE = "http://localhost:8888" +DEFAULT_MODEL = "deepseek-coder:6.7b" +PERPLEXITY_THRESHOLD = 15.0 # Higher = less likely to trigger search +``` + +## Database + +JarvisChat uses SQLite (`jarvischat.db` in the same directory as `app.py`): + +| Table | Purpose | +|-------|---------| +| conversations | Chat sessions with model and timestamps | +| messages | Individual messages with role and content | +| system_presets | Saved system prompt presets | +| profile | Your persistent memory/context | +| settings | App settings (search/profile toggles, default model) | + +## Logging + +JarvisChat logs to syslog via journald: + +```bash +# Follow live logs +journalctl -t jarvischat -f + +# View last 100 entries +journalctl -t jarvischat -n 100 +``` + +## Token Thermometer + +The vertical bar next to the input shows your context usage in real-time: + +- **Green** — Plenty of room +- **Yellow** — 70%+ used +- **Red** — 90%+ used (approaching limit) + +The count includes: profile + preset + conversation history + current input. Context size is fetched from Ollama when you switch models. + +## Search Flow + +1. User sends message → Ollama streams response with logprobs +2. JarvisChat calculates perplexity from logprobs +3. If perplexity > 15.0 OR refusal patterns detected: + - Yield `{searching: True}` to show spinner + - Query SearXNG (or wttr.in for weather) + - Inject results into context + - Re-prompt Ollama +4. If model still refuses, format raw search results directly +5. Clean hedging phrases from response +6. Yield final response with PPL and t/s badges + +## API Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/` | GET | Web UI | +| `/api/models` | GET | List Ollama models | +| `/api/ps` | GET | Running models | +| `/api/show` | POST | Model info (context size) | +| `/api/chat` | POST | Stream chat (SSE) | +| `/api/conversations` | GET | List conversations | +| `/api/conversations/{id}` | GET/DELETE | Get/delete conversation | +| `/api/profile` | GET/PUT | Get/update profile | +| `/api/presets` | GET/POST | List/create presets | +| `/api/presets/{id}` | PUT/DELETE | Update/delete preset | +| `/api/settings` | GET/PUT | App settings | +| `/api/search/status` | GET | SearXNG availability | + +## Screenshots + +*(Add your own screenshot here)* + +## TODO + +### Active + +1. ~~**Mass-delete conversation history**~~ ✓ (v1.3.0) + +2. **Verify SearXNG and Docker services persist across reboots** + - Expand refusal patterns: "As an AI model", "based on my training data", "I don't have the capability" + +3. **Input trigger: `search+` prefix** + - Strip prefix, query SearXNG directly, Ollama summarizes + - Raw results in expandable div (not tooltip) + +4. **Add `profile.example.md`** + - Recommended default profile with anti-bullshit rules (no "As an AI", no OpenAI mentions) + +### Backlog + +5. Conversation search/filter by keyword +6. Export conversation to markdown/text +7. Keyboard shortcuts (Ctrl+N new chat, Ctrl+Enter send) +8. ~~Token count estimate before sending~~ ✓ (v1.2.9) +9. Model info display — context length, VRAM usage from Ollama `/api/ps` +10. Retry button on assistant messages +11. Source links — clickable links when search used +12. Allow conversation renaming +13. Multiple profiles — coding/sysadmin/general +14. Auto-generate conversation tags (client-side KWIC, top 5, filterable badges) +15. **Image input support** + - Pull vision model (llava, llama3.2-vision, etc.) + - Frontend: file input / drag-drop, base64 encode + - Backend: pass `images` array to Ollama `/api/chat` + +## Version History + +| Version | Changes | +|---------|---------| +| 1.3.0 | Delete all conversations button | +| 1.2.9 | Token thermometer with live context tracking | +| 1.2.8 | Logo in sidebar, llama emoji tagline | +| 1.2.7 | Tokens per second (t/s) badge on responses | +| 1.2.6 | wttr.in weather integration, improved search extraction | +| 1.2.5 | SearXNG infoboxes/answers, smarter query building | +| 1.2.4 | Perplexity badges, hedging cleanup | +| 1.2.3 | SearXNG integration with perplexity-based triggering | +| 1.2.0 | System prompt presets, settings persistence | +| 1.1.0 | Profile memory, model switching | +| 1.0.0 | Initial release | + +## License + +MIT + +--- + +## A Note from Gramps + +I named my AI machine "jarvis" after the AI assistant in *Iron Man* (2008) — because it's an awesome name. When I started building a local coding companion to talk to it, "JarvisChat" just made sense. + +This project is in active development. Eventually it'll get packaged up as a Docker thing, but for now while I'm iterating fast, a single-file Python service does the job. + +--- + +*Built with 🦙 by Gramps at the Llama Chile Shop* diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d84dedb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +fastapi>=0.115.0 +uvicorn[standard]>=0.32.0 +httpx>=0.27.0