diff --git a/.gitignore b/.gitignore index d2c5cc6..d303506 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.py- __pycache__/ venv/ +readme.md- diff --git a/app.py b/app.py index 9e98ad0..300fe03 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,7 @@ Talks to Ollama API on localhost:11434 Features: - Persistent profile/memory injected into every conversation + - FTS5-based memory system for context retrieval - Saved system prompt presets (coding assistant, sysadmin, general, custom) - Streaming chat with conversation history - Model switching between all installed Ollama models @@ -24,11 +25,14 @@ import re from datetime import datetime, timezone from pathlib import Path from contextlib import asynccontextmanager +from typing import Optional import httpx import psutil from fastapi import FastAPI, Request, HTTPException from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates # --- Logging Setup --- import logging.handlers @@ -40,18 +44,20 @@ syslog_handler.setFormatter(logging.Formatter('jarvischat[%(process)d]: %(leveln log.addHandler(syslog_handler) # --- Configuration --- -VERSION = "1.3.1" +VERSION = "1.4.0" OLLAMA_BASE = "http://localhost:11434" SEARXNG_BASE = "http://localhost:8888" -DB_PATH = Path(__file__).parent / "jarvischat.db" +BASE_DIR = Path(__file__).parent +DB_PATH = BASE_DIR / "jarvischat.db" DEFAULT_MODEL = "deepseek-coder:6.7b" +# --- Templates and Static Files --- +templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) + # --- Perplexity Threshold --- -# Higher perplexity = model is less confident / more uncertain -# Tune this based on your models. Start conservative (higher threshold). PERPLEXITY_THRESHOLD = 15.0 -# --- Refusal Patterns (fallback for confident refusals) --- +# --- Refusal Patterns --- REFUSAL_PATTERNS = re.compile(r"|".join([ r"i don'?t have (?:real-?time|current|live)", r"i (?:can'?t|cannot) provide (?:current|real-?time|live)", @@ -63,7 +69,7 @@ REFUSAL_PATTERNS = re.compile(r"|".join([ r"(?:check|visit|use) a (?:website|financial|news)", ]), re.IGNORECASE) -# --- Hedging patterns to strip from search-augmented responses --- +# --- Hedging patterns --- HEDGE_PATTERNS = [ r"^I'?m sorry,?\s*but\s*I\s*(?:can'?t|cannot)\s*assist\s*with\s*that[^.]*\.\s*", r"^I'?m sorry,?\s*but[^.]*(?:previous|incorrect)[^.]*\.\s*", @@ -83,21 +89,19 @@ def format_direct_answer(question: str, results: list[dict]) -> str: """Format search results directly when model refuses to help.""" if not results: return "No search results found." - - lines = [f"Here's what I found:\n"] - for r in results[:3]: # Top 3 results + lines = ["Here's what I found:\n"] + for r in results[:3]: lines.append(f"**{r['title']}**") if r['content']: lines.append(f"{r['content']}") lines.append("") - return "\n".join(lines).strip() # --- Default Profile --- DEFAULT_PROFILE = """You are a coding companion running locally on a machine called "jarvis". ## Environment -- jarvis: Debian 13 (trixie) x86_64, AMD Ryzen 5 5600X, 16GB RAM, AMD RX 6600 XT (8GB VRAM), IP varies +- jarvis: Debian 13 (trixie) x86_64, AMD Ryzen 5 5600X, 16GB RAM, AMD RX 6600 XT (8GB VRAM) - llamadev: Windows 11, primary development machine, IP 192.168.50.108, user "alphaalpaca" - Corsair: Windows 11, gaming/streaming rig - pivault: RPi 5, 8GB RAM, Debian 13, 11TB RAID5 NAS at /mnt/pivault, IP 192.168.50.159 @@ -110,10 +114,8 @@ DEFAULT_PROFILE = """You are a coding companion running locally on a machine cal - Currently learning Rust, transitioning from decades of PHP - Building a WW2 mobile game in Godot Engine for Android - Runs a YouTube series: "Building a Professional Dev Environment with Local AI" -- Working on "Sysadmin's Wizard's Notebook" app concept in Rust - Veteran on fixed income — prefers free/open-source solutions - Home lab enthusiast with Z-Wave and Tapo smart home devices -- Streams Fortnite on a regular schedule ## How to Respond - Be direct and concise — no hand-holding, this user knows what they're doing @@ -126,24 +128,20 @@ DEFAULT_PROFILE = """You are a coding companion running locally on a machine cal # --- Default System Prompt Presets --- DEFAULT_PRESETS = [ - { - "name": "Coding Companion", - "prompt": "You are a senior software engineer and coding companion. Focus on writing clean, efficient, well-documented code. Provide complete working examples. Explain architectural decisions and trade-offs. Prefer Rust, Python, and bash." - }, - { - "name": "Linux Sysadmin", - "prompt": "You are an experienced Linux systems administrator. Focus on command-line solutions, systemd services, networking, storage, and security. Prefer Debian/Ubuntu conventions. Be concise and direct." - }, - { - "name": "General Assistant", - "prompt": "You are a helpful general-purpose assistant. Be clear and concise." - } + {"name": "Coding Companion", "prompt": "You are a senior software engineer and coding companion. Focus on writing clean, efficient, well-documented code. Provide complete working examples. Explain architectural decisions and trade-offs. Prefer Rust, Python, and bash."}, + {"name": "Linux Sysadmin", "prompt": "You are an experienced Linux systems administrator. Focus on command-line solutions, systemd services, networking, storage, and security. Prefer Debian/Ubuntu conventions. Be concise and direct."}, + {"name": "General Assistant", "prompt": "You are a helpful general-purpose assistant. Be clear and concise."} ] -# --- Database Setup --- + +# ============================================================================= +# DATABASE +# ============================================================================= + def init_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row + conn.execute(""" CREATE TABLE IF NOT EXISTS conversations ( id TEXT PRIMARY KEY, @@ -185,13 +183,22 @@ def init_db(): value TEXT NOT NULL ) """) + + # FTS5 Memory table + conn.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS memories USING fts5( + fact, + topic, + source, + created_at UNINDEXED + ) + """) # Seed default profile if empty existing = conn.execute("SELECT id FROM profile WHERE id = 1").fetchone() if not existing: now = datetime.now(timezone.utc).isoformat() - conn.execute("INSERT INTO profile (id, content, updated_at) VALUES (1, ?, ?)", - (DEFAULT_PROFILE, now)) + conn.execute("INSERT INTO profile (id, content, updated_at) VALUES (1, ?, ?)", (DEFAULT_PROFILE, now)) # Seed default presets if empty existing_presets = conn.execute("SELECT COUNT(*) as c FROM system_presets").fetchone() @@ -204,11 +211,7 @@ def init_db(): ) # Default settings - defaults = { - "profile_enabled": "true", - "default_model": DEFAULT_MODEL, - "search_enabled": "true", - } + defaults = {"profile_enabled": "true", "default_model": DEFAULT_MODEL, "search_enabled": "true", "memory_enabled": "true"} for key, value in defaults.items(): existing = conn.execute("SELECT key FROM settings WHERE key = ?", (key,)).fetchone() if not existing: @@ -223,107 +226,192 @@ def get_db(): conn.execute("PRAGMA foreign_keys = ON") return conn -# --- SearXNG Integration --- + +# ============================================================================= +# MEMORY SYSTEM (FTS5) +# ============================================================================= + +def add_memory(fact: str, topic: str = "general", source: str = "explicit") -> int: + """Store a new memory. Returns rowid.""" + db = get_db() + now = datetime.now(timezone.utc).isoformat() + cur = db.execute( + "INSERT INTO memories (fact, topic, source, created_at) VALUES (?, ?, ?, ?)", + (fact, topic, source, now) + ) + db.commit() + rowid = cur.lastrowid + db.close() + log.info(f"Memory added [{topic}]: {fact[:50]}...") + return rowid + +def search_memories(query: str, limit: int = 5) -> list[dict]: + """Search memories by relevance using FTS5.""" + if not query.strip(): + return [] + db = get_db() + words = [w.strip() for w in query.split() if w.strip()] + if not words: + db.close() + return [] + safe_query = " OR ".join(word + "*" for word in words[:10]) + try: + rows = db.execute(""" + SELECT rowid, fact, topic, source, created_at, bm25(memories) AS rank + FROM memories WHERE memories MATCH ? ORDER BY rank LIMIT ? + """, (safe_query, limit)).fetchall() + results = [dict(row) for row in rows] + log.debug(f"Memory search '{query}' returned {len(results)} results") + except Exception as e: + log.warning(f"Memory search error: {e}") + results = [] + db.close() + return results + +def get_all_memories(topic: Optional[str] = None) -> list[dict]: + """Get all memories, optionally filtered by topic.""" + db = get_db() + if topic: + rows = db.execute("SELECT rowid, * FROM memories WHERE topic = ? ORDER BY created_at DESC", (topic,)).fetchall() + else: + rows = db.execute("SELECT rowid, * FROM memories ORDER BY created_at DESC").fetchall() + db.close() + return [dict(row) for row in rows] + +def delete_memory(rowid: int) -> bool: + """Delete a memory by rowid.""" + db = get_db() + cur = db.execute("DELETE FROM memories WHERE rowid = ?", (rowid,)) + db.commit() + deleted = cur.rowcount > 0 + db.close() + if deleted: + log.info(f"Memory deleted: rowid={rowid}") + return deleted + +def update_memory(rowid: int, fact: str) -> bool: + """Update an existing memory's fact.""" + db = get_db() + cur = db.execute("UPDATE memories SET fact = ? WHERE rowid = ?", (fact, rowid)) + db.commit() + updated = cur.rowcount > 0 + db.close() + return updated + +def get_memory_count() -> int: + """Get total number of memories.""" + db = get_db() + count = db.execute("SELECT COUNT(*) as c FROM memories").fetchone()["c"] + db.close() + return count + + +# --- Remember/Forget command processing --- +REMEMBER_PATTERNS = [ + (r"remember that (.+)", "explicit"), + (r"please remember (.+)", "explicit"), + (r"don'?t forget (.+)", "explicit"), + (r"note that (.+)", "explicit"), + (r"keep in mind (?:that )?(.+)", "explicit"), +] + +FORGET_PATTERNS = [ + r"forget (?:that )?(.+)", + r"don'?t remember (.+)", + r"remove (?:the )?memory (?:about |that )?(.+)", +] + +def detect_topic(fact: str) -> str: + """Auto-detect memory topic from content.""" + fact_lower = fact.lower() + if any(w in fact_lower for w in ["prefer", "like", "hate", "always", "never", "favorite"]): + return "preference" + elif any(w in fact_lower for w in ["working on", "building", "project", "developing"]): + return "project" + elif any(w in fact_lower for w in ["run", "install", "server", "ip", "port", "service", "docker", "systemd"]): + return "infrastructure" + elif any(w in fact_lower for w in ["my name", "i am", "i'm a", "i live", "my wife", "my partner"]): + return "personal" + return "general" + +def process_remember_command(user_message: str) -> Optional[str]: + """Check for 'remember/forget' commands. Returns confirmation or None.""" + for pattern, source in REMEMBER_PATTERNS: + match = re.search(pattern, user_message, re.IGNORECASE) + if match: + fact = match.group(1).strip().rstrip('.') + topic = detect_topic(fact) + add_memory(fact, topic=topic, source=source) + return f"✓ Remembered [{topic}]: {fact}" + + for pattern in FORGET_PATTERNS: + match = re.search(pattern, user_message, re.IGNORECASE) + if match: + search_term = match.group(1).strip().rstrip('.') + memories = search_memories(search_term, limit=3) + if memories: + for m in memories: + delete_memory(m["rowid"]) + return f"✓ Forgot {len(memories)} memory/memories about: {search_term}" + else: + return f"✗ No memories found about: {search_term}" + return None + + +# ============================================================================= +# SEARXNG INTEGRATION +# ============================================================================= + async def query_searxng(query: str, max_results: int = 5) -> list[dict]: """Query SearXNG and return search results.""" log.info(f"Querying SearXNG: '{query}'") async with httpx.AsyncClient() as client: - # For weather queries, hit wttr.in directly + # Weather shortcut weather_match = re.search(r"(?:weather|temperature|forecast)\s+(?:in\s+)?(.+?)(?:\s+right now|\s+today|\s+degrees)?$", query, re.IGNORECASE) if weather_match or "weather" in query.lower() or "temperature" in query.lower(): location = weather_match.group(1) if weather_match else re.sub(r"(weather|temperature|forecast|right now|today|degrees)", "", query, flags=re.IGNORECASE).strip() if location: try: - log.info(f"Fetching weather for: {location}") - resp = await client.get( - f"https://wttr.in/{location}?format=3", - timeout=10.0, - headers={"User-Agent": "curl/7.68.0"} - ) + resp = await client.get(f"https://wttr.in/{location}?format=3", timeout=10.0, headers={"User-Agent": "curl/7.68.0"}) if resp.status_code == 200: - weather_text = resp.text.strip() - log.info(f"wttr.in returned: {weather_text}") - return [{ - "title": "Current Weather", - "url": f"https://wttr.in/{location}", - "content": weather_text, - }] + return [{"title": "Current Weather", "url": f"https://wttr.in/{location}", "content": resp.text.strip()}] except Exception as e: - log.warning(f"wttr.in error: {e}, falling back to SearXNG") + log.warning(f"wttr.in error: {e}") try: - resp = await client.get( - f"{SEARXNG_BASE}/search", - params={ - "q": query, - "format": "json", - "categories": "general", - }, - timeout=10.0 - ) + resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": query, "format": "json", "categories": "general"}, timeout=10.0) if resp.status_code == 200: data = resp.json() results = [] - - # Check for direct answers/infoboxes first - if data.get("answers"): - for answer in data["answers"]: - results.append({ - "title": "Direct Answer", - "url": "", - "content": answer, - }) - log.info(f"Got direct answer: {answer[:100]}") - - if data.get("infoboxes"): - for box in data["infoboxes"]: - content = box.get("content", "") - if not content and box.get("attributes"): - content = " | ".join([f"{a.get('label','')}: {a.get('value','')}" for a in box["attributes"]]) - results.append({ - "title": box.get("infobox", "Info"), - "url": box.get("urls", [{}])[0].get("url", "") if box.get("urls") else "", - "content": content, - }) - log.info(f"Got infobox: {box.get('infobox', '')}") - - # Then regular results + for answer in data.get("answers", []): + results.append({"title": "Direct Answer", "url": "", "content": answer}) + for box in data.get("infoboxes", []): + content = box.get("content", "") + if not content and box.get("attributes"): + content = " | ".join([f"{a.get('label','')}: {a.get('value','')}" for a in box["attributes"]]) + results.append({"title": box.get("infobox", "Info"), "url": box.get("urls", [{}])[0].get("url", "") if box.get("urls") else "", "content": content}) for r in data.get("results", [])[:max_results]: - results.append({ - "title": r.get("title", ""), - "url": r.get("url", ""), - "content": r.get("content", ""), - }) - - log.info(f"SearXNG returned {len(results)} total results") - for i, r in enumerate(results[:5]): - log.debug(f" Result {i+1}: {r['title'][:60]}") + results.append({"title": r.get("title", ""), "url": r.get("url", ""), "content": r.get("content", "")}) + log.info(f"SearXNG returned {len(results)} results") return results - else: - log.warning(f"SearXNG returned status {resp.status_code}") except Exception as e: log.error(f"SearXNG error: {e}") return [] def calculate_perplexity(logprobs: list) -> float: - """Calculate perplexity from logprobs. Higher = less confident.""" if not logprobs: return 0.0 avg_logprob = sum(lp["logprob"] for lp in logprobs) / len(logprobs) - perplexity = math.exp(-avg_logprob) - return perplexity + return math.exp(-avg_logprob) def is_uncertain(logprobs: list, threshold: float = PERPLEXITY_THRESHOLD) -> bool: - """Check if model output indicates uncertainty based on perplexity.""" if not logprobs: - log.debug("No logprobs returned, skipping uncertainty check") return False perplexity = calculate_perplexity(logprobs) log.info(f"Perplexity: {perplexity:.2f} (threshold: {threshold})") return perplexity > threshold def is_refusal(text: str) -> bool: - """Check if model is refusing/admitting it can't help.""" match = REFUSAL_PATTERNS.search(text) if match: log.info(f"Refusal detected: '{match.group()}'") @@ -331,60 +419,81 @@ def is_refusal(text: str) -> bool: return False def format_search_results(results: list[dict]) -> str: - """Format search results as context for the model.""" if not results: return "" - lines = ["[LIVE WEB DATA]\n"] for i, r in enumerate(results, 1): lines.append(f"{i}. {r['title']}") if r['content']: lines.append(f" {r['content']}") lines.append("") - - lines.append("\nAnswer directly using the data above. No apologies. No disclaimers. No \"please verify elsewhere.\" Just answer.") + lines.append("\nAnswer directly using the data above. No apologies. No disclaimers. Just answer.") return "\n".join(lines) def extract_search_query(user_message: str) -> str: - """Extract a good search query from the user's message.""" query = user_message.strip() - - # For temperature/weather queries, be more specific if re.search(r"temperature|weather", query, re.IGNORECASE): - query = re.sub(r"^what('?s| is) the ", "", query, flags=re.IGNORECASE) - query = query + " right now degrees" - - # For price queries, be more specific + query = re.sub(r"^what('?s| is) the ", "", query, flags=re.IGNORECASE) + " right now degrees" if re.search(r"price|spot price", query, re.IGNORECASE): - query = re.sub(r"^(what('?s| is)|can you tell me) the ", "", query, flags=re.IGNORECASE) - query = query + " today USD" - - # Remove common question words + query = re.sub(r"^(what('?s| is)|can you tell me) the ", "", query, flags=re.IGNORECASE) + " today USD" query = re.sub(r"^(what|who|where|when|why|how|is|are|can|could|would|should|do|does|did)\s+", "", query, flags=re.IGNORECASE) - # Remove trailing punctuation query = re.sub(r"[?!.]+$", "", query) - # Limit length - if len(query) > 100: - query = query[:100] - return query.strip() or user_message[:100] + return query[:100].strip() or user_message[:100] + + +# ============================================================================= +# GPU STATS +# ============================================================================= + +def get_gpu_stats() -> dict: + """Get AMD GPU stats via rocm-smi.""" + try: + result = subprocess.run(["rocm-smi", "--showuse", "--showmemuse", "--json"], capture_output=True, text=True, timeout=5) + if result.returncode == 0: + data = json.loads(result.stdout) + gpu_info = data.get("card0", {}) + gpu_use = gpu_info.get("GPU use (%)", 0) + vram_use = gpu_info.get("GPU Memory Allocated (VRAM%)", 0) + if isinstance(gpu_use, str): + gpu_use = int(gpu_use.replace("%", "").strip() or 0) + if isinstance(vram_use, str): + vram_use = int(vram_use.replace("%", "").strip() or 0) + return {"gpu_percent": gpu_use, "vram_percent": vram_use, "available": True} + except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError): + pass + except Exception as e: + log.warning(f"GPU stats error: {e}") + return {"gpu_percent": 0, "vram_percent": 0, "available": False} + + +# ============================================================================= +# APP LIFECYCLE +# ============================================================================= -# --- App Lifecycle --- @asynccontextmanager async def lifespan(app: FastAPI): log.info(f"JarvisChat v{VERSION} starting up") - log.info(f"Ollama: {OLLAMA_BASE}") - log.info(f"SearXNG: {SEARXNG_BASE}") + log.info(f"Ollama: {OLLAMA_BASE}, SearXNG: {SEARXNG_BASE}") init_db() + log.info(f"Memory system: {get_memory_count()} memories loaded") yield log.info("JarvisChat shutting down") app = FastAPI(title="JarvisChat", lifespan=lifespan) -# --- API Routes --- +# Mount static files +static_dir = BASE_DIR / "static" +if static_dir.exists(): + app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") + + +# ============================================================================= +# API ROUTES +# ============================================================================= @app.get("/", response_class=HTMLResponse) -async def index(): - return HTML_PAGE.replace("{{VERSION}}", VERSION) +async def index(request: Request): + return templates.TemplateResponse("index.html", {"request": request, "version": VERSION}) @app.get("/api/models") async def list_models(): @@ -393,7 +502,7 @@ async def list_models(): resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10) return resp.json() except httpx.ConnectError: - raise HTTPException(status_code=502, detail="Cannot connect to Ollama. Is it running?") + raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") @app.get("/api/ps") async def running_models(): @@ -406,7 +515,6 @@ async def running_models(): @app.post("/api/show") async def show_model(request: Request): - """Get model information including context size.""" body = await request.json() async with httpx.AsyncClient() as client: try: @@ -415,75 +523,17 @@ async def show_model(request: Request): except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") -# --- Search Status --- @app.get("/api/search/status") async def search_status(): - """Check if SearXNG is available.""" async with httpx.AsyncClient() as client: try: - resp = await client.get(f"{SEARXNG_BASE}/healthz", timeout=5) + resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": "test", "format": "json"}, timeout=5) return {"available": resp.status_code == 200} except: - # Try a simple search as fallback health check - try: - resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": "test", "format": "json"}, timeout=5) - return {"available": resp.status_code == 200} - except: - return {"available": False} - -# --- System Stats --- - -def get_gpu_stats() -> dict: - """Get AMD GPU stats via rocm-smi.""" - try: - result = subprocess.run( - ["rocm-smi", "--showuse", "--showmemuse", "--json"], - capture_output=True, text=True, timeout=5 - ) - if result.returncode == 0: - data = json.loads(result.stdout) - # Parse rocm-smi JSON output - gpu_info = data.get("card0", {}) - gpu_use = gpu_info.get("GPU use (%)", 0) - vram_use = gpu_info.get("GPU Memory Allocated (VRAM%)", 0) - # Handle string or int values - if isinstance(gpu_use, str): - gpu_use = int(gpu_use.replace("%", "").strip() or 0) - if isinstance(vram_use, str): - vram_use = int(vram_use.replace("%", "").strip() or 0) - return {"gpu_percent": gpu_use, "vram_percent": vram_use, "available": True} - except subprocess.TimeoutExpired: - log.warning("rocm-smi timed out") - except FileNotFoundError: - log.debug("rocm-smi not found") - except json.JSONDecodeError: - # Fallback: parse text output - try: - result = subprocess.run( - ["rocm-smi", "--showuse", "--showmemuse"], - capture_output=True, text=True, timeout=5 - ) - gpu_use = 0 - vram_use = 0 - for line in result.stdout.split("\n"): - if "GPU use (%)" in line: - match = re.search(r"(\d+)", line.split(":")[-1]) - if match: - gpu_use = int(match.group(1)) - elif "GPU Memory Allocated (VRAM%)" in line: - match = re.search(r"(\d+)", line.split(":")[-1]) - if match: - vram_use = int(match.group(1)) - return {"gpu_percent": gpu_use, "vram_percent": vram_use, "available": True} - except Exception as e: - log.warning(f"rocm-smi parse error: {e}") - except Exception as e: - log.warning(f"GPU stats error: {e}") - return {"gpu_percent": 0, "vram_percent": 0, "available": False} + return {"available": False} @app.get("/api/stats") async def system_stats(): - """Get system resource usage (CPU, memory, GPU).""" cpu_percent = psutil.cpu_percent(interval=0.1) memory = psutil.virtual_memory() gpu = get_gpu_stats() @@ -497,6 +547,47 @@ async def system_stats(): "gpu_available": gpu["available"], } + +# --- Memory API --- + +@app.get("/api/memories") +async def list_memories(topic: Optional[str] = None): + memories = get_all_memories(topic) + return {"memories": memories, "count": len(memories)} + +@app.post("/api/memories") +async def create_memory(request: Request): + body = await request.json() + rowid = add_memory(fact=body["fact"], topic=body.get("topic", "general"), source=body.get("source", "manual")) + return {"rowid": rowid, "status": "ok"} + +@app.delete("/api/memories/{rowid}") +async def remove_memory(rowid: int): + if not delete_memory(rowid): + raise HTTPException(status_code=404, detail="Memory not found") + return {"status": "ok"} + +@app.put("/api/memories/{rowid}") +async def edit_memory(rowid: int, request: Request): + body = await request.json() + if not update_memory(rowid, body["fact"]): + raise HTTPException(status_code=404, detail="Memory not found") + return {"status": "ok"} + +@app.get("/api/memories/search") +async def search_memories_api(q: str, limit: int = 10): + results = search_memories(q, limit=limit) + return {"results": results, "count": len(results)} + +@app.get("/api/memories/stats") +async def memory_stats(): + db = get_db() + total = db.execute("SELECT COUNT(*) as c FROM memories").fetchone()["c"] + topics = db.execute("SELECT topic, COUNT(*) as c FROM memories GROUP BY topic ORDER BY c DESC").fetchall() + db.close() + return {"total": total, "by_topic": {row["topic"]: row["c"] for row in topics}} + + # --- Profile --- @app.get("/api/profile") @@ -504,17 +595,14 @@ async def get_profile(): db = get_db() row = db.execute("SELECT content, updated_at FROM profile WHERE id = 1").fetchone() db.close() - if row: - return {"content": row["content"], "updated_at": row["updated_at"]} - return {"content": "", "updated_at": ""} + return {"content": row["content"], "updated_at": row["updated_at"]} if row else {"content": "", "updated_at": ""} @app.put("/api/profile") async def update_profile(request: Request): body = await request.json() now = datetime.now(timezone.utc).isoformat() db = get_db() - db.execute("UPDATE profile SET content = ?, updated_at = ? WHERE id = 1", - (body["content"], now)) + db.execute("UPDATE profile SET content = ?, updated_at = ? WHERE id = 1", (body["content"], now)) db.commit() db.close() return {"status": "ok", "updated_at": now} @@ -523,6 +611,7 @@ async def update_profile(request: Request): async def get_default_profile(): return {"content": DEFAULT_PROFILE} + # --- Settings --- @app.get("/api/settings") @@ -542,6 +631,7 @@ async def update_settings(request: Request): db.close() return {"status": "ok"} + # --- System Presets --- @app.get("/api/presets") @@ -557,10 +647,8 @@ async def create_preset(request: Request): preset_id = str(uuid.uuid4()) now = datetime.now(timezone.utc).isoformat() db = get_db() - db.execute( - "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 0, ?)", - (preset_id, body["name"], body["prompt"], now) - ) + db.execute("INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 0, ?)", + (preset_id, body["name"], body["prompt"], now)) db.commit() db.close() return {"id": preset_id, "name": body["name"], "prompt": body["prompt"]} @@ -569,8 +657,7 @@ async def create_preset(request: Request): async def update_preset(preset_id: str, request: Request): body = await request.json() db = get_db() - db.execute("UPDATE system_presets SET name = ?, prompt = ? WHERE id = ?", - (body["name"], body["prompt"], preset_id)) + db.execute("UPDATE system_presets SET name = ?, prompt = ? WHERE id = ?", (body["name"], body["prompt"], preset_id)) db.commit() db.close() return {"status": "ok"} @@ -583,7 +670,8 @@ async def delete_preset(preset_id: str): db.close() return {"status": "ok"} -# --- Conversation CRUD --- + +# --- Conversations --- @app.get("/api/conversations") async def list_conversations(): @@ -600,10 +688,8 @@ async def create_conversation(request: Request): model = body.get("model", DEFAULT_MODEL) title = body.get("title", "New Chat") db = get_db() - db.execute( - "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", - (conv_id, title, model, now, now) - ) + db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now)) db.commit() db.close() return {"id": conv_id, "title": title, "model": model, "created_at": now, "updated_at": now} @@ -615,9 +701,7 @@ async def get_conversation(conv_id: str): if not conv: db.close() raise HTTPException(status_code=404, detail="Conversation not found") - messages = db.execute( - "SELECT * FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,) - ).fetchall() + messages = db.execute("SELECT * FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,)).fetchall() db.close() return {"conversation": dict(conv), "messages": [dict(m) for m in messages]} @@ -627,11 +711,9 @@ async def update_conversation(conv_id: str, request: Request): db = get_db() now = datetime.now(timezone.utc).isoformat() if "title" in body: - db.execute("UPDATE conversations SET title = ?, updated_at = ? WHERE id = ?", - (body["title"], now, conv_id)) + db.execute("UPDATE conversations SET title = ?, updated_at = ? WHERE id = ?", (body["title"], now, conv_id)) if "model" in body: - db.execute("UPDATE conversations SET model = ?, updated_at = ? WHERE id = ?", - (body["model"], now, conv_id)) + db.execute("UPDATE conversations SET model = ?, updated_at = ? WHERE id = ?", (body["model"], now, conv_id)) db.commit() db.close() return {"status": "ok"} @@ -655,24 +737,34 @@ async def delete_all_conversations(): log.info("Deleted all conversations") return {"status": "ok"} -# --- Chat (streaming) --- -def build_system_prompt(db, extra_prompt=""): - """Build the full system prompt: profile + preset/custom prompt""" +# ============================================================================= +# CHAT (STREAMING) +# ============================================================================= + +def build_system_prompt(db, extra_prompt="", user_message=""): + """Build the full system prompt: profile + memories + preset.""" parts = [] - - # Check if profile is enabled settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} + if settings.get("profile_enabled", "true") == "true": profile = db.execute("SELECT content FROM profile WHERE id = 1").fetchone() if profile and profile["content"].strip(): parts.append(profile["content"].strip()) + if settings.get("memory_enabled", "true") == "true" and user_message: + memories = search_memories(user_message, limit=5) + if memories: + memory_lines = [f"- {m['fact']}" for m in memories] + parts.append("## Relevant Context from Memory\n" + "\n".join(memory_lines)) + log.debug(f"Injected {len(memories)} memories into context") + if extra_prompt and extra_prompt.strip(): parts.append(extra_prompt.strip()) return "\n\n---\n\n".join(parts) if parts else "" + @app.post("/api/chat") async def chat(request: Request): body = await request.json() @@ -686,38 +778,25 @@ async def chat(request: Request): db = get_db() now = datetime.now(timezone.utc).isoformat() - - # Check if search is enabled settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} search_enabled = settings.get("search_enabled", "true") == "true" - log.debug(f"Chat request: model={model}, search_enabled={search_enabled}") - # Auto-create conversation if needed + remember_response = process_remember_command(user_message) + if not conv_id: conv_id = str(uuid.uuid4()) title = user_message[:80] + ("..." if len(user_message) > 80 else "") - db.execute( - "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", - (conv_id, title, model, now, now) - ) + db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now)) else: db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id)) - # Save user message - db.execute( - "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "user", user_message, now) - ) + db.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "user", user_message, now)) db.commit() - # Build message history - history_rows = db.execute( - "SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", - (conv_id,) - ).fetchall() - - # Build system prompt (profile + preset) - system_prompt = build_system_prompt(db, preset_prompt) + history_rows = db.execute("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,)).fetchall() + system_prompt = build_system_prompt(db, preset_prompt, user_message) db.close() messages = [] @@ -726,25 +805,20 @@ async def chat(request: Request): for row in history_rows: messages.append({"role": row["role"], "content": row["content"]}) - ollama_payload = { - "model": model, - "messages": messages, - "stream": True, - "logprobs": True, - } + ollama_payload = {"model": model, "messages": messages, "stream": True, "logprobs": True} async def stream_response(): full_response = [] all_logprobs = [] tokens_per_sec = 0.0 + + if remember_response: + yield f"data: {json.dumps({'token': remember_response + chr(10) + chr(10), 'conversation_id': conv_id})}\n\n" + async with httpx.AsyncClient() as client: try: - async with client.stream( - "POST", - f"{OLLAMA_BASE}/api/chat", - json=ollama_payload, - timeout=httpx.Timeout(300.0, connect=10.0) - ) as resp: + async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload, + timeout=httpx.Timeout(300.0, connect=10.0)) as resp: async for line in resp.aiter_lines(): if line.strip(): try: @@ -753,11 +827,9 @@ async def chat(request: Request): token = chunk["message"]["content"] full_response.append(token) yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n" - # Collect logprobs if "logprobs" in chunk and chunk["logprobs"]: all_logprobs.extend(chunk["logprobs"]) if chunk.get("done"): - # Capture timing info from final chunk eval_count = chunk.get("eval_count", 0) eval_duration = chunk.get("eval_duration", 0) tokens_per_sec = (eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0 @@ -765,109 +837,75 @@ async def chat(request: Request): except json.JSONDecodeError: pass - # Check for uncertainty and search if needed assistant_msg = "".join(full_response) perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0 should_search = is_uncertain(all_logprobs) or is_refusal(assistant_msg) - + if search_enabled and should_search: - # Signal that we're searching yield f"data: {json.dumps({'searching': True, 'conversation_id': conv_id})}\n\n" - - # Query SearXNG search_query = extract_search_query(user_message) - log.info(f"Extracted search query: '{search_query}'") search_results = await query_searxng(search_query) - + if search_results: - # Build augmented messages - inject search context, DON'T include the refusal search_context = format_search_results(search_results) - - # Rebuild: system prompt + search context + original user question augmented_messages = [] if system_prompt: augmented_messages.append({"role": "system", "content": system_prompt + "\n\n" + search_context}) else: augmented_messages.append({"role": "system", "content": search_context}) - - # Add conversation history except the last user message (we'll re-add it) for row in history_rows[:-1]: augmented_messages.append({"role": row["role"], "content": row["content"]}) - - # Re-add the user question augmented_messages.append({"role": "user", "content": user_message}) - - augmented_payload = { - "model": model, - "messages": augmented_messages, - "stream": True, - } - - # Signal search results found - include actual results for debug - yield f"data: {json.dumps({'search_results': len(search_results), 'results_preview': [r['title'] for r in search_results], 'conversation_id': conv_id})}\n\n" - - # Stream the augmented response - yield f"data: {json.dumps({'debug': 'Starting augmented response...', 'conversation_id': conv_id})}\n\n" + + yield f"data: {json.dumps({'search_results': len(search_results), 'conversation_id': conv_id})}\n\n" + augmented_response = [] - async with client.stream( - "POST", - f"{OLLAMA_BASE}/api/chat", - json=augmented_payload, - timeout=httpx.Timeout(300.0, connect=10.0) - ) as resp2: + async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", + json={"model": model, "messages": augmented_messages, "stream": True}, + timeout=httpx.Timeout(300.0, connect=10.0)) as resp2: async for line in resp2.aiter_lines(): if line.strip(): try: chunk = json.loads(line) if "message" in chunk and "content" in chunk["message"]: - token = chunk["message"]["content"] - augmented_response.append(token) + augmented_response.append(chunk["message"]["content"]) if chunk.get("done"): break except json.JSONDecodeError: pass - - # Clean hedging from the response - raw_response = "".join(augmented_response) - if not raw_response.strip(): - log.warning("Augmented response empty, falling back to original") - raw_response = assistant_msg + + raw_response = "".join(augmented_response) or assistant_msg cleaned_response = clean_hedging(raw_response) - log.debug(f"Cleaned hedging: {len(raw_response)} -> {len(cleaned_response)} chars") - - # If model STILL refuses after getting search data, format answer ourselves if is_refusal(cleaned_response) or len(cleaned_response) < 20: - log.warning("Model refused even with search context, formatting direct answer") cleaned_response = format_direct_answer(user_message, search_results) - - # Send cleaned response as single chunk + yield f"data: {json.dumps({'token': cleaned_response, 'conversation_id': conv_id, 'augmented': True})}\n\n" - - # Save the cleaned response - search_note = "\n\n---\n*🔍 Enhanced with web search results*" - saved_msg = cleaned_response + search_note - + + saved_msg = cleaned_response + "\n\n---\n*🔍 Enhanced with web search results*" + if remember_response: + saved_msg = remember_response + "\n\n" + saved_msg + db2 = get_db() - db2.execute( - "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat()) - ) + db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat())) db2.commit() db2.close() - + yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'searched': True, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" return - - # No search needed - save original response + + saved_msg = assistant_msg + if remember_response: + saved_msg = remember_response + "\n\n" + saved_msg + db2 = get_db() - db2.execute( - "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "assistant", assistant_msg, datetime.now(timezone.utc).isoformat()) - ) + db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat())) db2.commit() db2.close() + yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" - + except httpx.ConnectError: yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n" except Exception as e: @@ -875,976 +913,7 @@ async def chat(request: Request): return StreamingResponse(stream_response(), media_type="text/event-stream") -# ===================================================================== -# FRONTEND -# ===================================================================== - -HTML_PAGE = r""" - - - - -JarvisChat - - - - - - - - - - - -
-
-
- Model - -
-
- - -
-
- -
-
- -

JarvisChat — your local coding companion.
Profile context is injected automatically.
Web search kicks in when the model is uncertain.
Pick a model and start building.

-
-
- -
-
- PRESET - -
-
- -
-
-
-- / --
-
- -
-
-
- - - - -""" if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8080) + uvicorn.run(app, host="0.0.0.0", port=8080) diff --git a/readme.md b/readme.md index 8cac20b..0b99d80 100644 --- a/readme.md +++ b/readme.md @@ -1,263 +1,89 @@ -# ⚡ JarvisChat +# JarvisChat v1.4.0 -**A lightweight Ollama coding companion that runs on Python 3.13** +Lightweight Ollama coding companion with FTS5 memory system. -![Version](https://img.shields.io/badge/version-1.3.1-blue) -![Python](https://img.shields.io/badge/python-3.13-green) -![License](https://img.shields.io/badge/license-MIT-orange) +## New in v1.4.0 +- **FTS5 Memory System**: Say "remember that..." to store facts, they're automatically retrieved by relevance +- **Forget command**: Say "forget about..." to remove memories +- **Memory toggle**: Enable/disable memory injection from topbar +- **Refactored structure**: Separated frontend from backend for maintainability -JarvisChat is a single-file FastAPI application that provides a clean, responsive web interface for Ollama. It features persistent memory, automatic web search when the model is uncertain, and real-time token tracking. - -## Features - -- **Persistent Profile/Memory** — Your context is injected into every conversation automatically -- **System Prompt Presets** — Switch between coding assistant, sysadmin, general, or custom modes -- **Streaming Chat** — Real-time token streaming with conversation history -- **Model Switching** — Hot-swap between all installed Ollama models -- **Web Search Integration** — SearXNG kicks in automatically when the model is uncertain (perplexity-based) -- **Weather Queries** — Direct wttr.in integration for weather questions -- **Token Thermometer** — Visual context usage bar with live updates as you type -- **Perplexity & Speed Badges** — See model confidence (PPL) and tokens/sec on each response -- **Copy-to-Clipboard** — One-click copy on all code blocks -- **Dark Theme** — Easy on the eyes for long coding sessions - -## Architecture +## File Structure ``` -Browser ◄──► app.py (FastAPI) ◄──► Ollama (LLM) - │ - ▼ (when uncertain) - SearXNG (web search) +/opt/jarvischat/ +├── app.py # FastAPI backend (~600 lines) +├── jarvischat.db # SQLite database (auto-created) +├── static/ +│ └── logo.jpg # Your logo (optional) +└── templates/ + └── index.html # Frontend ``` -JarvisChat acts as middleware between your browser and Ollama. When the model's perplexity exceeds a threshold (default 15.0) or it refuses to answer, JarvisChat automatically queries SearXNG, injects the results, and re-prompts the model. - -**This is NOT training** — SearXNG is only used at runtime as a fallback for uncertain responses. - -## Requirements - -- Python 3.11+ (tested on 3.13) -- Ollama running locally (default: `localhost:11434`) -- SearXNG (optional, for web search — default: `localhost:8888`) -- ROCm (optional, for AMD GPU stats — `rocm-smi` must be in PATH) - ## Installation ```bash -# Clone or download app.py -git clone https://github.com/llamachileshop-code/313_webui.git -cd 313_webui +# Backup existing +cd /opt/jarvischat +cp app.py app.py.bak -# Create virtual environment (recommended) -python3 -m venv venv -source venv/bin/activate +# Create directories +mkdir -p templates static -# Install dependencies -pip install fastapi httpx uvicorn psutil +# Copy new files (from wherever you downloaded them) +cp /path/to/new/app.py . +cp /path/to/new/templates/index.html templates/ -# Run -python app.py -# or -uvicorn app:app --host 0.0.0.0 --port 8080 +# Extract logo from old app.py if you want (or just let it fail gracefully) +# The frontend handles missing logo with onerror="this.style.display='none'" + +# Restart service +sudo systemctl restart jarvischat ``` -Open `http://localhost:8080` in your browser. +## Memory Commands -**Note:** If running as a systemd service with a venv, install dependencies using the venv pip directly: -```bash -/opt/jarvischat/venv/bin/pip install fastapi httpx uvicorn psutil -``` +In chat, you can say: +- "remember that I prefer Rust over Go" → stores as preference +- "remember that JarvisChat runs on port 8080" → stores as infrastructure +- "note that the deadline is Friday" → stores as general +- "forget about the deadline" → removes matching memories -## Running as a Service - -**Important:** Although JarvisChat is a single-file Python application, it's designed to run as a persistent service alongside Ollama — not as a one-off script. Both services should start on boot. - -### systemd Service (recommended) - -Create `/etc/systemd/system/jarvischat.service`: - -```ini -[Unit] -Description=JarvisChat - Ollama Web UI -After=network.target ollama.service -Wants=ollama.service - -[Service] -Type=simple -User=your-username -WorkingDirectory=/path/to/313_webui -ExecStart=/usr/bin/python3 app.py -Restart=on-failure -RestartSec=5 - -[Install] -WantedBy=multi-user.target -``` - -Then enable and start: - -```bash -sudo systemctl daemon-reload -sudo systemctl enable jarvischat -sudo systemctl start jarvischat -``` - -### Verify Both Services - -```bash -# Check Ollama -systemctl status ollama - -# Check JarvisChat -systemctl status jarvischat - -# View JarvisChat logs -journalctl -t jarvischat -f -``` - -## Configuration - -Edit these constants at the top of `app.py`: - -```python -VERSION = "1.3.1" -OLLAMA_BASE = "http://localhost:11434" -SEARXNG_BASE = "http://localhost:8888" -DEFAULT_MODEL = "deepseek-coder:6.7b" -PERPLEXITY_THRESHOLD = 15.0 # Higher = less likely to trigger search -``` - -## Database - -JarvisChat uses SQLite (`jarvischat.db` in the same directory as `app.py`): - -| Table | Purpose | -|-------|---------| -| conversations | Chat sessions with model and timestamps | -| messages | Individual messages with role and content | -| system_presets | Saved system prompt presets | -| profile | Your persistent memory/context | -| settings | App settings (search/profile toggles, default model) | - -## Logging - -JarvisChat logs to syslog via journald: - -```bash -# Follow live logs -journalctl -t jarvischat -f - -# View last 100 entries -journalctl -t jarvischat -n 100 -``` - -## Token Thermometer - -The vertical bar next to the input shows your context usage in real-time: - -- **Green** — Plenty of room -- **Yellow** — 70%+ used -- **Red** — 90%+ used (approaching limit) - -The count includes: profile + preset + conversation history + current input. Context size is fetched from Ollama when you switch models. - -## Search Flow - -1. User sends message → Ollama streams response with logprobs -2. JarvisChat calculates perplexity from logprobs -3. If perplexity > 15.0 OR refusal patterns detected: - - Yield `{searching: True}` to show spinner - - Query SearXNG (or wttr.in for weather) - - Inject results into context - - Re-prompt Ollama -4. If model still refuses, format raw search results directly -5. Clean hedging phrases from response -6. Yield final response with PPL and t/s badges +Memories are automatically searched and injected based on your message content. ## API Endpoints -| Endpoint | Method | Description | -|----------|--------|-------------| -| `/` | GET | Web UI | -| `/api/models` | GET | List Ollama models | -| `/api/ps` | GET | Running models | -| `/api/show` | POST | Model info (context size) | -| `/api/stats` | GET | System stats (CPU, memory, GPU, VRAM) | -| `/api/chat` | POST | Stream chat (SSE) | -| `/api/conversations` | GET/DELETE | List/delete all conversations | -| `/api/conversations/{id}` | GET/DELETE | Get/delete conversation | -| `/api/profile` | GET/PUT | Get/update profile | -| `/api/presets` | GET/POST | List/create presets | -| `/api/presets/{id}` | PUT/DELETE | Update/delete preset | -| `/api/settings` | GET/PUT | App settings | -| `/api/search/status` | GET | SearXNG availability | +### Memory +- `GET /api/memories` - List all memories +- `POST /api/memories` - Add memory `{"fact": "...", "topic": "general"}` +- `DELETE /api/memories/{rowid}` - Delete memory +- `GET /api/memories/search?q=rust` - Search memories +- `GET /api/memories/stats` - Get counts by topic -## Screenshots +### Existing +- `GET /api/models` - List Ollama models +- `POST /api/chat` - Send message (streaming) +- `GET /api/profile` - Get profile +- `PUT /api/settings` - Update settings -*(Add your own screenshot here)* +## Dependencies -## TODO +```bash +pip install fastapi uvicorn httpx psutil jinja2 python-multipart --break-system-packages +``` -### Active +## Testing Memory -1. ~~**Mass-delete conversation history**~~ ✓ (v1.3.0) +```bash +# Add a memory via API +curl -X POST http://jarvis:8080/api/memories \ + -H "Content-Type: application/json" \ + -d '{"fact": "User prefers native installs over Docker", "topic": "preference"}' -2. **Verify SearXNG and Docker services persist across reboots** - - Expand refusal patterns: "As an AI model", "based on my training data", "I don't have the capability" +# Search memories +curl "http://jarvis:8080/api/memories/search?q=docker" -3. **Input trigger: `search+` prefix** - - Strip prefix, query SearXNG directly, Ollama summarizes - - Raw results in expandable div (not tooltip) - -4. **Add `profile.example.md`** - - Recommended default profile with anti-bullshit rules (no "As an AI", no OpenAI mentions) - -### Backlog - -5. Conversation search/filter by keyword -6. Export conversation to markdown/text -7. Keyboard shortcuts (Ctrl+N new chat, Ctrl+Enter send) -8. ~~Token count estimate before sending~~ ✓ (v1.2.9) -9. Model info display — context length, VRAM usage from Ollama `/api/ps` -10. Retry button on assistant messages -11. Source links — clickable links when search used -12. Allow conversation renaming -13. Multiple profiles — coding/sysadmin/general -14. Auto-generate conversation tags (client-side KWIC, top 5, filterable badges) -15. **Image input support** - - Pull vision model (llava, llama3.2-vision, etc.) - - Frontend: file input / drag-drop, base64 encode - - Backend: pass `images` array to Ollama `/api/chat` - -## Version History - -| Version | Changes | -|---------|---------| -| 1.3.1 | System stats panel (CPU, memory, GPU, VRAM) in sidebar | -| 1.3.0 | Delete all conversations button | -| 1.2.9 | Token thermometer with live context tracking | -| 1.2.8 | Logo in sidebar, llama emoji tagline | -| 1.2.7 | Tokens per second (t/s) badge on responses | -| 1.2.6 | wttr.in weather integration, improved search extraction | -| 1.2.5 | SearXNG infoboxes/answers, smarter query building | -| 1.2.4 | Perplexity badges, hedging cleanup | -| 1.2.3 | SearXNG integration with perplexity-based triggering | -| 1.2.0 | System prompt presets, settings persistence | -| 1.1.0 | Profile memory, model switching | -| 1.0.0 | Initial release | - -## License - -MIT - ---- - -## A Note from Gramps - -I named my AI machine "jarvis" after the AI assistant in *Iron Man* (2008) — because it's an awesome name. When I started building a local coding companion to talk to it, "JarvisChat" just made sense. - -This project is in active development. Eventually it'll get packaged up as a Docker thing, but for now while I'm iterating fast, a single-file Python service does the job. - ---- - -*Built with 🦙 by Gramps at the Llama Chile Shop* +# Or in chat, just say: +# "remember that I hate yaml" +# Then ask: "what markup languages should I avoid?" +``` diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..1afd0ed --- /dev/null +++ b/templates/index.html @@ -0,0 +1,784 @@ + + + + + +JarvisChat + + + + + + + + + + +
+
+
+ Model + +
+
+ + + +
+
+
+
+ +

JarvisChat — your local coding companion.
Profile + Memory context injected automatically.
Web search kicks in when the model is uncertain.
Say "remember that..." to teach me things.

+
+
+
+
+ PRESET + +
+
+ +
+
+
-- / --
+
+ +
+
+
+ + + +