From 81319f83d489bb060f4833545c060336f65296c1 Mon Sep 17 00:00:00 2001 From: gramps Date: Mon, 27 Apr 2026 10:09:53 -0700 Subject: [PATCH] feat(auth): add guest/admin PIN security model and hardening --- app.py | 984 ++++++++++++++++-- ...opilot-context-loss-incident-2026-04-21.md | 51 + docs/wiki/current-wip.md | 83 ++ readme.md | 32 + templates/index.html | 311 +++++- tests/test_auth_capabilities.py | 78 ++ 6 files changed, 1394 insertions(+), 145 deletions(-) create mode 100644 docs/copilot-context-loss-incident-2026-04-21.md create mode 100644 docs/wiki/current-wip.md create mode 100644 tests/test_auth_capabilities.py diff --git a/app.py b/app.py index 9546864..60b6b2a 100644 --- a/app.py +++ b/app.py @@ -19,19 +19,25 @@ Features: import json import logging import math +import os import sqlite3 import subprocess +import hashlib +import hmac +import time import uuid import re +from threading import Lock from datetime import datetime, timezone from pathlib import Path from contextlib import asynccontextmanager from typing import Optional +from urllib.parse import urlparse import httpx import psutil from fastapi import FastAPI, Request, HTTPException -from fastapi.responses import HTMLResponse, StreamingResponse +from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates @@ -40,8 +46,10 @@ import logging.handlers log = logging.getLogger("jarvischat") log.setLevel(logging.DEBUG) -syslog_handler = logging.handlers.SysLogHandler(address='/dev/log') -syslog_handler.setFormatter(logging.Formatter('jarvischat[%(process)d]: %(levelname)s %(message)s')) +syslog_handler = logging.handlers.SysLogHandler(address="/dev/log") +syslog_handler.setFormatter( + logging.Formatter("jarvischat[%(process)d]: %(levelname)s %(message)s") +) log.addHandler(syslog_handler) # --- Configuration --- @@ -52,6 +60,18 @@ BASE_DIR = Path(__file__).parent DB_PATH = BASE_DIR / "jarvischat.db" DEFAULT_MODEL = "llama3.1:latest" +# --- Auth / Session Configuration --- +# Session timeout is intentionally short so tab close/crash leaves a brief exposure window. +SESSION_TIMEOUT_SECONDS = 90 +MAX_PIN_ATTEMPTS = 5 +PIN_LOCKOUT_SECONDS = 300 +ALLOW_DEFAULT_PIN = os.getenv("JARVISCHAT_ALLOW_DEFAULT_PIN", "false").lower() == "true" +TRUSTED_ORIGINS = { + origin.strip().rstrip("/") + for origin in os.getenv("JARVISCHAT_TRUSTED_ORIGINS", "").split(",") + if origin.strip() +} + # --- Templates and Static Files --- templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) @@ -59,19 +79,24 @@ templates = Jinja2Templates(directory=str(BASE_DIR / "templates")) PERPLEXITY_THRESHOLD = 15.0 # --- Refusal Patterns --- -REFUSAL_PATTERNS = re.compile(r"|".join([ - r"i don'?t have (?:real-?time|current|live)", - r"i (?:can'?t|cannot) provide (?:current|real-?time|live)", - r"i don'?t have access to (?:current|real-?time|live)", - r"(?:current|live|real-?time) (?:data|information|prices?|weather)", - r"my (?:knowledge|training) (?:cutoff|only goes|ends)", - r"as of my (?:knowledge|training) cutoff", - r"i'?m not able to (?:access|provide|browse)", - r"(?:check|visit|use) a (?:website|financial|news)", - r"as an ai model", - r"based on my training data", - r"i don'?t have the capability", -]), re.IGNORECASE) +REFUSAL_PATTERNS = re.compile( + r"|".join( + [ + r"i don'?t have (?:real-?time|current|live)", + r"i (?:can'?t|cannot) provide (?:current|real-?time|live)", + r"i don'?t have access to (?:current|real-?time|live)", + r"(?:current|live|real-?time) (?:data|information|prices?|weather)", + r"my (?:knowledge|training) (?:cutoff|only goes|ends)", + r"as of my (?:knowledge|training) cutoff", + r"i'?m not able to (?:access|provide|browse)", + r"(?:check|visit|use) a (?:website|financial|news)", + r"as an ai model", + r"based on my training data", + r"i don'?t have the capability", + ] + ), + re.IGNORECASE, +) # --- Hedging patterns --- HEDGE_PATTERNS = [ @@ -82,6 +107,42 @@ HEDGE_PATTERNS = [ r"[Bb]ut\s+please\s+(?:make\s+sure|verify|check)[^.]*\.\s*", ] +SESSIONS: dict[str, dict] = {} +PIN_ATTEMPTS: dict[str, dict] = {} +SESSION_LOCK = Lock() + + +def hash_pin(pin: str, salt_hex: Optional[str] = None) -> tuple[str, str]: + """Hash a 4-digit PIN with PBKDF2-HMAC-SHA256.""" + salt = bytes.fromhex(salt_hex) if salt_hex else os.urandom(16) + digest = hashlib.pbkdf2_hmac("sha256", pin.encode("utf-8"), salt, 200_000) + return salt.hex(), digest.hex() + + +def audit_event( + event: str, + outcome: str, + *, + ip: str = "unknown", + role: str = "none", + details: str = "", + warning: bool = False, +) -> None: + # Structured audit entries make destructive/auth events searchable in journal/syslog. + payload = { + "event": event, + "outcome": outcome, + "ip": ip, + "role": role, + "details": details[:300], + } + msg = "AUDIT " + json.dumps(payload, separators=(",", ":")) + if warning: + log.warning(msg) + else: + log.info(msg) + + def clean_hedging(text: str) -> str: """Remove hedging sentences from model response.""" cleaned = text @@ -89,6 +150,7 @@ def clean_hedging(text: str) -> str: cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE) return cleaned.strip() + def format_direct_answer(question: str, results: list[dict]) -> str: """Format search results directly when model refuses to help.""" if not results: @@ -96,11 +158,12 @@ def format_direct_answer(question: str, results: list[dict]) -> str: lines = ["Here's what I found:\n"] for r in results[:3]: lines.append(f"**{r['title']}**") - if r['content']: + if r["content"]: lines.append(f"{r['content']}") lines.append("") return "\n".join(lines).strip() + # --- Default Profile --- DEFAULT_PROFILE = """You are a coding companion running locally on a machine called "jarvis". @@ -132,9 +195,18 @@ DEFAULT_PROFILE = """You are a coding companion running locally on a machine cal # --- Default System Prompt Presets --- DEFAULT_PRESETS = [ - {"name": "Coding Companion", "prompt": "You are a senior software engineer and coding companion. Focus on writing clean, efficient, well-documented code. Provide complete working examples. Explain architectural decisions and trade-offs. Prefer Rust, Python, and bash."}, - {"name": "Linux Sysadmin", "prompt": "You are an experienced Linux systems administrator. Focus on command-line solutions, systemd services, networking, storage, and security. Prefer Debian/Ubuntu conventions. Be concise and direct."}, - {"name": "General Assistant", "prompt": "You are a helpful general-purpose assistant. Be clear and concise."} + { + "name": "Coding Companion", + "prompt": "You are a senior software engineer and coding companion. Focus on writing clean, efficient, well-documented code. Provide complete working examples. Explain architectural decisions and trade-offs. Prefer Rust, Python, and bash.", + }, + { + "name": "Linux Sysadmin", + "prompt": "You are an experienced Linux systems administrator. Focus on command-line solutions, systemd services, networking, storage, and security. Prefer Debian/Ubuntu conventions. Be concise and direct.", + }, + { + "name": "General Assistant", + "prompt": "You are a helpful general-purpose assistant. Be clear and concise.", + }, ] @@ -142,6 +214,7 @@ DEFAULT_PRESETS = [ # DATABASE # ============================================================================= + def init_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row @@ -202,28 +275,79 @@ def init_db(): existing = conn.execute("SELECT id FROM profile WHERE id = 1").fetchone() if not existing: now = datetime.now(timezone.utc).isoformat() - conn.execute("INSERT INTO profile (id, content, updated_at) VALUES (1, ?, ?)", (DEFAULT_PROFILE, now)) + conn.execute( + "INSERT INTO profile (id, content, updated_at) VALUES (1, ?, ?)", + (DEFAULT_PROFILE, now), + ) # Seed default presets if empty - existing_presets = conn.execute("SELECT COUNT(*) as c FROM system_presets").fetchone() + existing_presets = conn.execute( + "SELECT COUNT(*) as c FROM system_presets" + ).fetchone() if existing_presets["c"] == 0: now = datetime.now(timezone.utc).isoformat() for preset in DEFAULT_PRESETS: conn.execute( "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 1, ?)", - (str(uuid.uuid4()), preset["name"], preset["prompt"], now) + (str(uuid.uuid4()), preset["name"], preset["prompt"], now), ) # Default settings - defaults = {"profile_enabled": "true", "default_model": DEFAULT_MODEL, "search_enabled": "true", "memory_enabled": "true"} + defaults = { + "profile_enabled": "true", + "default_model": DEFAULT_MODEL, + "search_enabled": "true", + "memory_enabled": "true", + } for key, value in defaults.items(): - existing = conn.execute("SELECT key FROM settings WHERE key = ?", (key,)).fetchone() + existing = conn.execute( + "SELECT key FROM settings WHERE key = ?", (key,) + ).fetchone() if not existing: - conn.execute("INSERT INTO settings (key, value) VALUES (?, ?)", (key, value)) + conn.execute( + "INSERT INTO settings (key, value) VALUES (?, ?)", (key, value) + ) + + # Seed admin PIN hash if missing. + existing_pin_hash = conn.execute( + "SELECT value FROM settings WHERE key = 'admin_pin_hash'" + ).fetchone() + existing_pin_salt = conn.execute( + "SELECT value FROM settings WHERE key = 'admin_pin_salt'" + ).fetchone() + if not existing_pin_hash or not existing_pin_salt: + # First-boot policy: require explicit PIN unless operator explicitly opts into insecure fallback. + configured_pin = os.getenv("JARVISCHAT_ADMIN_PIN", "").strip() + if re.fullmatch(r"\d{4}", configured_pin): + seed_pin = configured_pin + pin_source = "env" + elif ALLOW_DEFAULT_PIN: + seed_pin = "1234" + pin_source = "default" + else: + raise RuntimeError( + "Admin PIN bootstrap blocked: set JARVISCHAT_ADMIN_PIN to a 4-digit PIN " + "or set JARVISCHAT_ALLOW_DEFAULT_PIN=true to allow insecure default PIN 1234." + ) + + salt_hex, pin_hash_hex = hash_pin(seed_pin) + conn.execute( + "INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)", + ("admin_pin_hash", pin_hash_hex), + ) + conn.execute( + "INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)", + ("admin_pin_salt", salt_hex), + ) + if pin_source == "default": + log.warning("Admin PIN seeded from insecure default 1234 (override enabled).") + else: + log.info("Admin PIN hash seeded from configured environment PIN.") conn.commit() conn.close() + def get_db(): conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row @@ -231,17 +355,25 @@ def get_db(): return conn +def get_setting(db, key: str, default: str = "") -> str: + row = db.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone() + return row["value"] if row else default + + # ============================================================================= # MEMORY SYSTEM (FTS5) # ============================================================================= -def add_memory(fact: str, topic: str = "general", source: str = "explicit") -> int | None: + +def add_memory( + fact: str, topic: str = "general", source: str = "explicit" +) -> int | None: """Store a new memory. Returns rowid.""" db = get_db() now = datetime.now(timezone.utc).isoformat() cur = db.execute( "INSERT INTO memories (fact, topic, source, created_at) VALUES (?, ?, ?, ?)", - (fact, topic, source, now) + (fact, topic, source, now), ) db.commit() rowid = cur.lastrowid @@ -249,6 +381,7 @@ def add_memory(fact: str, topic: str = "general", source: str = "explicit") -> i log.info(f"Memory added [{topic}]: {fact[:50]}...") return rowid + def search_memories(query: str, limit: int = 5) -> list[dict]: """Search memories by relevance using FTS5.""" if not query.strip(): @@ -260,10 +393,13 @@ def search_memories(query: str, limit: int = 5) -> list[dict]: return [] safe_query = " OR ".join(word + "*" for word in words[:10]) try: - rows = db.execute(""" + rows = db.execute( + """ SELECT rowid, fact, topic, source, created_at, bm25(memories) AS rank FROM memories WHERE memories MATCH ? ORDER BY rank LIMIT ? - """, (safe_query, limit)).fetchall() + """, + (safe_query, limit), + ).fetchall() results = [dict(row) for row in rows] log.debug(f"Memory search '{query}' returned {len(results)} results") except Exception as e: @@ -272,16 +408,23 @@ def search_memories(query: str, limit: int = 5) -> list[dict]: db.close() return results + def get_all_memories(topic: Optional[str] = None) -> list[dict]: """Get all memories, optionally filtered by topic.""" db = get_db() if topic: - rows = db.execute("SELECT rowid, * FROM memories WHERE topic = ? ORDER BY created_at DESC", (topic,)).fetchall() + rows = db.execute( + "SELECT rowid, * FROM memories WHERE topic = ? ORDER BY created_at DESC", + (topic,), + ).fetchall() else: - rows = db.execute("SELECT rowid, * FROM memories ORDER BY created_at DESC").fetchall() + rows = db.execute( + "SELECT rowid, * FROM memories ORDER BY created_at DESC" + ).fetchall() db.close() return [dict(row) for row in rows] + def delete_memory(rowid: int) -> bool: """Delete a memory by rowid.""" db = get_db() @@ -293,6 +436,7 @@ def delete_memory(rowid: int) -> bool: log.info(f"Memory deleted: rowid={rowid}") return deleted + def update_memory(rowid: int, fact: str) -> bool: """Update an existing memory's fact.""" db = get_db() @@ -302,6 +446,7 @@ def update_memory(rowid: int, fact: str) -> bool: db.close() return updated + def get_memory_count() -> int: """Get total number of memories.""" db = get_db() @@ -325,25 +470,47 @@ FORGET_PATTERNS = [ r"remove (?:the )?memory (?:about |that )?(.+)", ] + def detect_topic(fact: str) -> str: """Auto-detect memory topic from content.""" fact_lower = fact.lower() - if any(w in fact_lower for w in ["prefer", "like", "hate", "always", "never", "favorite"]): + if any( + w in fact_lower + for w in ["prefer", "like", "hate", "always", "never", "favorite"] + ): return "preference" - elif any(w in fact_lower for w in ["working on", "building", "project", "developing"]): + elif any( + w in fact_lower for w in ["working on", "building", "project", "developing"] + ): return "project" - elif any(w in fact_lower for w in ["run", "install", "server", "ip", "port", "service", "docker", "systemd"]): + elif any( + w in fact_lower + for w in [ + "run", + "install", + "server", + "ip", + "port", + "service", + "docker", + "systemd", + ] + ): return "infrastructure" - elif any(w in fact_lower for w in ["my name", "i am", "i'm a", "i live", "my wife", "my partner"]): + elif any( + w in fact_lower + for w in ["my name", "i am", "i'm a", "i live", "my wife", "my partner"] + ): return "personal" return "general" + def process_remember_command(user_message: str) -> Optional[str]: """Check for 'remember/forget' commands. Returns confirmation or None.""" for pattern, source in REMEMBER_PATTERNS: match = re.search(pattern, user_message, re.IGNORECASE) if match: - fact = match.group(1).strip().rstrip('.') + fact = match.group(1).strip().rstrip(".") topic = detect_topic(fact) add_memory(fact, topic=topic, source=source) return f"✓ Remembered [{topic}]: {fact}" @@ -351,7 +518,7 @@ def process_remember_command(user_message: str) -> Optional[str]: for pattern in FORGET_PATTERNS: match = re.search(pattern, user_message, re.IGNORECASE) if match: - search_term = match.group(1).strip().rstrip('.') + search_term = match.group(1).strip().rstrip(".") memories = search_memories(search_term, limit=3) if memories: for m in memories: @@ -366,48 +533,103 @@ def process_remember_command(user_message: str) -> Optional[str]: # SEARXNG INTEGRATION # ============================================================================= + async def query_searxng(query: str, max_results: int = 5) -> list[dict]: """Query SearXNG and return search results.""" log.info(f"Querying SearXNG: '{query}'") async with httpx.AsyncClient() as client: # Weather shortcut - weather_match = re.search(r"(?:weather|temperature|forecast)\s+(?:in\s+)?(.+?)(?:\s+right now|\s+today|\s+degrees)?$", query, re.IGNORECASE) - if weather_match or "weather" in query.lower() or "temperature" in query.lower(): - location = weather_match.group(1) if weather_match else re.sub(r"(weather|temperature|forecast|right now|today|degrees)", "", query, flags=re.IGNORECASE).strip() + weather_match = re.search( + r"(?:weather|temperature|forecast)\s+(?:in\s+)?(.+?)(?:\s+right now|\s+today|\s+degrees)?$", + query, + re.IGNORECASE, + ) + if ( + weather_match + or "weather" in query.lower() + or "temperature" in query.lower() + ): + location = ( + weather_match.group(1) + if weather_match + else re.sub( + r"(weather|temperature|forecast|right now|today|degrees)", + "", + query, + flags=re.IGNORECASE, + ).strip() + ) if location: try: - resp = await client.get(f"https://wttr.in/{location}?format=3", timeout=10.0, headers={"User-Agent": "curl/7.68.0"}) + resp = await client.get( + f"https://wttr.in/{location}?format=3", + timeout=10.0, + headers={"User-Agent": "curl/7.68.0"}, + ) if resp.status_code == 200: - return [{"title": "Current Weather", "url": f"https://wttr.in/{location}", "content": resp.text.strip()}] + return [ + { + "title": "Current Weather", + "url": f"https://wttr.in/{location}", + "content": resp.text.strip(), + } + ] except Exception as e: log.warning(f"wttr.in error: {e}") try: - resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": query, "format": "json", "categories": "general"}, timeout=10.0) + resp = await client.get( + f"{SEARXNG_BASE}/search", + params={"q": query, "format": "json", "categories": "general"}, + timeout=10.0, + ) if resp.status_code == 200: data = resp.json() results = [] for answer in data.get("answers", []): - results.append({"title": "Direct Answer", "url": "", "content": answer}) + results.append( + {"title": "Direct Answer", "url": "", "content": answer} + ) for box in data.get("infoboxes", []): content = box.get("content", "") if not content and box.get("attributes"): - content = " | ".join([f"{a.get('label','')}: {a.get('value','')}" for a in box["attributes"]]) - results.append({"title": box.get("infobox", "Info"), "url": box.get("urls", [{}])[0].get("url", "") if box.get("urls") else "", "content": content}) + content = " | ".join( + [ + f"{a.get('label', '')}: {a.get('value', '')}" + for a in box["attributes"] + ] + ) + results.append( + { + "title": box.get("infobox", "Info"), + "url": box.get("urls", [{}])[0].get("url", "") + if box.get("urls") + else "", + "content": content, + } + ) for r in data.get("results", [])[:max_results]: - results.append({"title": r.get("title", ""), "url": r.get("url", ""), "content": r.get("content", "")}) + results.append( + { + "title": r.get("title", ""), + "url": r.get("url", ""), + "content": r.get("content", ""), + } + ) log.info(f"SearXNG returned {len(results)} results") return results except Exception as e: log.error(f"SearXNG error: {e}") return [] + def calculate_perplexity(logprobs: list) -> float: if not logprobs: return 0.0 avg_logprob = sum(lp["logprob"] for lp in logprobs) / len(logprobs) return math.exp(-avg_logprob) + def is_uncertain(logprobs: list, threshold: float = PERPLEXITY_THRESHOLD) -> bool: if not logprobs: return False @@ -415,6 +637,7 @@ def is_uncertain(logprobs: list, threshold: float = PERPLEXITY_THRESHOLD) -> boo log.info(f"Perplexity: {perplexity:.2f} (threshold: {threshold})") return perplexity > threshold + def is_refusal(text: str) -> bool: match = REFUSAL_PATTERNS.search(text) if match: @@ -422,25 +645,42 @@ def is_refusal(text: str) -> bool: return True return False + def format_search_results(results: list[dict]) -> str: if not results: return "" lines = ["[LIVE WEB DATA]\n"] for i, r in enumerate(results, 1): lines.append(f"{i}. {r['title']}") - if r['content']: + if r["content"]: lines.append(f" {r['content']}") lines.append("") - lines.append("\nAnswer directly using the data above. No apologies. No disclaimers. Just answer.") + lines.append( + "\nAnswer directly using the data above. No apologies. No disclaimers. Just answer." + ) return "\n".join(lines) + def extract_search_query(user_message: str) -> str: query = user_message.strip() if re.search(r"temperature|weather", query, re.IGNORECASE): - query = re.sub(r"^what('?s| is) the ", "", query, flags=re.IGNORECASE) + " right now degrees" + query = ( + re.sub(r"^what('?s| is) the ", "", query, flags=re.IGNORECASE) + + " right now degrees" + ) if re.search(r"price|spot price", query, re.IGNORECASE): - query = re.sub(r"^(what('?s| is)|can you tell me) the ", "", query, flags=re.IGNORECASE) + " today USD" - query = re.sub(r"^(what|who|where|when|why|how|is|are|can|could|would|should|do|does|did)\s+", "", query, flags=re.IGNORECASE) + query = ( + re.sub( + r"^(what('?s| is)|can you tell me) the ", "", query, flags=re.IGNORECASE + ) + + " today USD" + ) + query = re.sub( + r"^(what|who|where|when|why|how|is|are|can|could|would|should|do|does|did)\s+", + "", + query, + flags=re.IGNORECASE, + ) query = re.sub(r"[?!.]+$", "", query) return query[:100].strip() or user_message[:100] @@ -449,10 +689,16 @@ def extract_search_query(user_message: str) -> str: # GPU STATS # ============================================================================= + def get_gpu_stats() -> dict: """Get AMD GPU stats via rocm-smi.""" try: - result = subprocess.run(["rocm-smi", "--showuse", "--showmemuse", "--json"], capture_output=True, text=True, timeout=5) + result = subprocess.run( + ["rocm-smi", "--showuse", "--showmemuse", "--json"], + capture_output=True, + text=True, + timeout=5, + ) if result.returncode == 0: data = json.loads(result.stdout) gpu_info = data.get("card0", {}) @@ -474,6 +720,7 @@ def get_gpu_stats() -> dict: # APP LIFECYCLE # ============================================================================= + @asynccontextmanager async def lifespan(app: FastAPI): log.info(f"JarvisChat v{VERSION} starting up") @@ -483,6 +730,7 @@ async def lifespan(app: FastAPI): yield log.info("JarvisChat shutting down") + app = FastAPI(title="JarvisChat", lifespan=lifespan) # Mount static files @@ -491,14 +739,356 @@ if static_dir.exists(): app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") +# ============================================================================= +# AUTH + SESSION +# ============================================================================= + + +def get_client_ip(request: Request) -> str: + forwarded = request.headers.get("x-forwarded-for", "").strip() + if forwarded: + return forwarded.split(",")[0].strip() + if request.client and request.client.host: + return request.client.host + return "unknown" + + +def cleanup_sessions(now_ts: Optional[float] = None) -> None: + now_ts = now_ts or time.time() + with SESSION_LOCK: + expired = [ + sid + for sid, meta in SESSIONS.items() + if (now_ts - meta.get("last_seen", 0)) > SESSION_TIMEOUT_SECONDS + ] + for sid in expired: + del SESSIONS[sid] + + +def verify_admin_pin(pin: str) -> bool: + if not re.fullmatch(r"\d{4}", pin or ""): + return False + db = get_db() + pin_hash = get_setting(db, "admin_pin_hash", "") + pin_salt = get_setting(db, "admin_pin_salt", "") + db.close() + if not pin_hash or not pin_salt: + return False + _, candidate_hash = hash_pin(pin, salt_hex=pin_salt) + return hmac.compare_digest(candidate_hash, pin_hash) + + +def is_ip_locked(ip: str) -> tuple[bool, int]: + now_ts = time.time() + with SESSION_LOCK: + state = PIN_ATTEMPTS.get(ip) + if not state: + return False, 0 + locked_until = state.get("locked_until", 0) + if locked_until > now_ts: + return True, int(locked_until - now_ts) + if locked_until: + PIN_ATTEMPTS.pop(ip, None) + return False, 0 + + +def record_pin_failure(ip: str) -> None: + now_ts = time.time() + with SESSION_LOCK: + state = PIN_ATTEMPTS.get(ip, {"fail_count": 0, "locked_until": 0}) + state["fail_count"] = int(state.get("fail_count", 0)) + 1 + if state["fail_count"] >= MAX_PIN_ATTEMPTS: + state["locked_until"] = now_ts + PIN_LOCKOUT_SECONDS + state["fail_count"] = 0 + PIN_ATTEMPTS[ip] = state + + +def clear_pin_failures(ip: str) -> None: + with SESSION_LOCK: + PIN_ATTEMPTS.pop(ip, None) + + +def create_session(ip: str, role: str) -> str: + now_ts = time.time() + sid = uuid.uuid4().hex + with SESSION_LOCK: + SESSIONS[sid] = { + "ip": ip, + "role": role, + "created_at": now_ts, + "last_seen": now_ts, + } + return sid + + +def validate_session(sid: str, ip: str, touch: bool = True) -> bool: + if not sid: + return False + now_ts = time.time() + cleanup_sessions(now_ts) + with SESSION_LOCK: + session = SESSIONS.get(sid) + if not session: + return False + if session.get("ip") != ip: + return False + if touch: + session["last_seen"] = now_ts + return True + + +def get_session(sid: str, ip: str, touch: bool = True) -> Optional[dict]: + if not sid: + return None + now_ts = time.time() + cleanup_sessions(now_ts) + with SESSION_LOCK: + session = SESSIONS.get(sid) + if not session: + return None + if session.get("ip") != ip: + return None + if touch: + session["last_seen"] = now_ts + return dict(session) + + +def revoke_session(sid: str) -> None: + if not sid: + return + with SESSION_LOCK: + SESSIONS.pop(sid, None) + + +def is_admin_only(path: str, method: str) -> bool: + # Capability split: guest may chat/search; write/destructive/admin config paths require PIN-unlocked admin. + if method in {"PUT", "DELETE", "PATCH"}: + return True + if method != "POST": + return False + guest_allowed_posts = { + "/api/chat", + "/api/search", + "/api/show", + "/api/auth/login", + "/api/auth/logout", + "/api/auth/session", + "/api/auth/heartbeat", + "/api/auth/guest", + } + return path not in guest_allowed_posts + + +def is_state_changing(method: str) -> bool: + return method in {"POST", "PUT", "DELETE", "PATCH"} + + +def origin_allowed(request: Request) -> bool: + """Allow same-origin browser writes and optional configured trusted origins. + + If Origin/Referer is absent, treat as non-browser/API client and allow + (token/session header remains the primary auth factor). + """ + host = request.headers.get("host", "").strip() + expected_origin = f"{request.url.scheme}://{host}".rstrip("/") if host else "" + origin = request.headers.get("origin", "").strip().rstrip("/") + referer = request.headers.get("referer", "").strip() + + if origin: + if origin == expected_origin or origin in TRUSTED_ORIGINS: + return True + return False + + if referer: + parsed = urlparse(referer) + ref_origin = f"{parsed.scheme}://{parsed.netloc}".rstrip("/") + if ref_origin == expected_origin or ref_origin in TRUSTED_ORIGINS: + return True + return False + + return True + + +@app.middleware("http") +async def session_auth_middleware(request: Request, call_next): + path = request.url.path + ip = get_client_ip(request) + request.state.session_role = "none" + request.state.client_ip = ip + + unauth_paths = { + "/api/auth/login", + "/api/auth/logout", + "/api/auth/session", + "/api/auth/heartbeat", + "/api/auth/guest", + } + + # CSRF hardening for browser writes: same-origin or explicitly allowlisted origins only. + if path.startswith("/api/") and is_state_changing(request.method): + if not origin_allowed(request): + audit_event( + "origin_check", + "denied", + ip=ip, + role="none", + details=f"{request.method} {path}", + warning=True, + ) + return JSONResponse( + status_code=403, + content={"detail": "Origin check failed"}, + ) + + if path.startswith("/api/") and path not in unauth_paths: + sid = request.headers.get("x-session-id", "").strip() + session = get_session(sid, ip, touch=True) + if not session: + audit_event( + "auth_required", + "denied", + ip=ip, + role="none", + details=f"{request.method} {path}", + warning=True, + ) + return JSONResponse( + status_code=401, + content={"detail": "Authentication required"}, + ) + request.state.session_role = session.get("role", "none") + # Guest sessions stay usable for chat, but advanced/destructive actions require admin capability. + if session.get("role") != "admin" and is_admin_only(path, request.method): + audit_event( + "admin_capability", + "denied", + ip=ip, + role=session.get("role", "none"), + details=f"{request.method} {path}", + warning=True, + ) + return JSONResponse( + status_code=403, + content={"detail": "Admin PIN required for this action"}, + ) + + response = await call_next(request) + # Emit success audit only after route executes, so logs reflect completed admin actions. + if path.startswith("/api/") and is_admin_only(path, request.method): + role = getattr(request.state, "session_role", "none") + if response.status_code < 400 and role == "admin": + audit_event( + "admin_action", + "success", + ip=ip, + role=role, + details=f"{request.method} {path}", + ) + return response + + +@app.post("/api/auth/guest") +async def auth_guest(request: Request): + ip = get_client_ip(request) + sid = create_session(ip, role="guest") + audit_event("guest_session", "success", ip=ip, role="guest") + return { + "status": "ok", + "session_id": sid, + "role": "guest", + "timeout_seconds": SESSION_TIMEOUT_SECONDS, + } + + +@app.post("/api/auth/login") +async def auth_login(request: Request): + body = await request.json() + pin = str(body.get("pin", "")) + ip = get_client_ip(request) + + locked, retry_after = is_ip_locked(ip) + if locked: + audit_event( + "admin_login", + "locked", + ip=ip, + role="none", + details=f"retry_after={retry_after}", + warning=True, + ) + raise HTTPException( + status_code=429, + detail=f"Too many failed PIN attempts. Retry in {retry_after}s.", + ) + + if not verify_admin_pin(pin): + record_pin_failure(ip) + audit_event("admin_login", "failed", ip=ip, role="none", warning=True) + raise HTTPException(status_code=401, detail="Invalid PIN") + + clear_pin_failures(ip) + sid = create_session(ip, role="admin") + audit_event("admin_login", "success", ip=ip, role="admin") + return { + "status": "ok", + "session_id": sid, + "role": "admin", + "timeout_seconds": SESSION_TIMEOUT_SECONDS, + } + + +@app.get("/api/auth/session") +async def auth_session(request: Request): + sid = request.headers.get("x-session-id", "").strip() + ip = get_client_ip(request) + session = get_session(sid, ip, touch=True) + return { + "authenticated": bool(session), + "role": session.get("role") if session else "none", + } + + +@app.post("/api/auth/heartbeat") +async def auth_heartbeat(request: Request): + sid = request.headers.get("x-session-id", "").strip() + ip = get_client_ip(request) + if not sid or not validate_session(sid, ip, touch=True): + raise HTTPException(status_code=401, detail="Authentication required") + return {"status": "ok"} + + +@app.post("/api/auth/logout") +async def auth_logout(request: Request): + ip = get_client_ip(request) + sid = request.headers.get("x-session-id", "").strip() + role = "none" + if sid: + session = get_session(sid, ip, touch=False) + role = session.get("role", "none") if session else "none" + if not sid: + try: + body = await request.json() + sid = str(body.get("session_id", "")).strip() + except Exception: + try: + sid = (await request.body()).decode("utf-8", errors="ignore").strip() + except Exception: + sid = "" + revoke_session(sid) + audit_event("logout", "success", ip=ip, role=role) + return {"status": "ok"} + + # ============================================================================= # API ROUTES # ============================================================================= + @app.get("/", response_class=HTMLResponse) async def index(request: Request): return templates.TemplateResponse(request, "index.html", {"version": VERSION}) + @app.get("/api/models") async def list_models(): async with httpx.AsyncClient() as client: @@ -508,6 +1098,7 @@ async def list_models(): except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") + @app.get("/api/ps") async def running_models(): async with httpx.AsyncClient() as client: @@ -517,6 +1108,7 @@ async def running_models(): except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") + @app.post("/api/show") async def show_model(request: Request): body = await request.json() @@ -527,15 +1119,21 @@ async def show_model(request: Request): except httpx.ConnectError: raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") + @app.get("/api/search/status") async def search_status(): async with httpx.AsyncClient() as client: try: - resp = await client.get(f"{SEARXNG_BASE}/search", params={"q": "test", "format": "json"}, timeout=5) + resp = await client.get( + f"{SEARXNG_BASE}/search", + params={"q": "test", "format": "json"}, + timeout=5, + ) return {"available": resp.status_code == 200} except Exception: return {"available": False} + @app.get("/api/stats") async def system_stats(): cpu_percent = psutil.cpu_percent(interval=0.1) @@ -554,23 +1152,31 @@ async def system_stats(): # --- Memory API --- + @app.get("/api/memories") async def list_memories(topic: Optional[str] = None): memories = get_all_memories(topic) return {"memories": memories, "count": len(memories)} + @app.post("/api/memories") async def create_memory(request: Request): body = await request.json() - rowid = add_memory(fact=body["fact"], topic=body.get("topic", "general"), source=body.get("source", "manual")) + rowid = add_memory( + fact=body["fact"], + topic=body.get("topic", "general"), + source=body.get("source", "manual"), + ) return {"rowid": rowid, "status": "ok"} + @app.delete("/api/memories/{rowid}") async def remove_memory(rowid: int): if not delete_memory(rowid): raise HTTPException(status_code=404, detail="Memory not found") return {"status": "ok"} + @app.put("/api/memories/{rowid}") async def edit_memory(rowid: int, request: Request): body = await request.json() @@ -578,39 +1184,53 @@ async def edit_memory(rowid: int, request: Request): raise HTTPException(status_code=404, detail="Memory not found") return {"status": "ok"} + @app.get("/api/memories/search") async def search_memories_api(q: str, limit: int = 10): results = search_memories(q, limit=limit) return {"results": results, "count": len(results)} + @app.get("/api/memories/stats") async def memory_stats(): db = get_db() total = db.execute("SELECT COUNT(*) as c FROM memories").fetchone()["c"] - topics = db.execute("SELECT topic, COUNT(*) as c FROM memories GROUP BY topic ORDER BY c DESC").fetchall() + topics = db.execute( + "SELECT topic, COUNT(*) as c FROM memories GROUP BY topic ORDER BY c DESC" + ).fetchall() db.close() return {"total": total, "by_topic": {row["topic"]: row["c"] for row in topics}} # --- Profile --- + @app.get("/api/profile") async def get_profile(): db = get_db() row = db.execute("SELECT content, updated_at FROM profile WHERE id = 1").fetchone() db.close() - return {"content": row["content"], "updated_at": row["updated_at"]} if row else {"content": "", "updated_at": ""} + return ( + {"content": row["content"], "updated_at": row["updated_at"]} + if row + else {"content": "", "updated_at": ""} + ) + @app.put("/api/profile") async def update_profile(request: Request): body = await request.json() now = datetime.now(timezone.utc).isoformat() db = get_db() - db.execute("UPDATE profile SET content = ?, updated_at = ? WHERE id = 1", (body["content"], now)) + db.execute( + "UPDATE profile SET content = ?, updated_at = ? WHERE id = 1", + (body["content"], now), + ) db.commit() db.close() return {"status": "ok", "updated_at": now} + @app.get("/api/profile/default") async def get_default_profile(): return {"content": DEFAULT_PROFILE} @@ -618,6 +1238,7 @@ async def get_default_profile(): # --- Settings --- + @app.get("/api/settings") async def get_settings(): db = get_db() @@ -625,12 +1246,16 @@ async def get_settings(): db.close() return {row["key"]: row["value"] for row in rows} + @app.put("/api/settings") async def update_settings(request: Request): body = await request.json() db = get_db() for key, value in body.items(): - db.execute("INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)", (key, str(value))) + db.execute( + "INSERT OR REPLACE INTO settings (key, value) VALUES (?, ?)", + (key, str(value)), + ) db.commit() db.close() return {"status": "ok"} @@ -638,38 +1263,51 @@ async def update_settings(request: Request): # --- System Presets --- + @app.get("/api/presets") async def list_presets(): db = get_db() - rows = db.execute("SELECT * FROM system_presets ORDER BY is_default DESC, name ASC").fetchall() + rows = db.execute( + "SELECT * FROM system_presets ORDER BY is_default DESC, name ASC" + ).fetchall() db.close() return [dict(r) for r in rows] + @app.post("/api/presets") async def create_preset(request: Request): body = await request.json() preset_id = str(uuid.uuid4()) now = datetime.now(timezone.utc).isoformat() db = get_db() - db.execute("INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 0, ?)", - (preset_id, body["name"], body["prompt"], now)) + db.execute( + "INSERT INTO system_presets (id, name, prompt, is_default, created_at) VALUES (?, ?, ?, 0, ?)", + (preset_id, body["name"], body["prompt"], now), + ) db.commit() db.close() return {"id": preset_id, "name": body["name"], "prompt": body["prompt"]} + @app.put("/api/presets/{preset_id}") async def update_preset(preset_id: str, request: Request): body = await request.json() db = get_db() - db.execute("UPDATE system_presets SET name = ?, prompt = ? WHERE id = ?", (body["name"], body["prompt"], preset_id)) + db.execute( + "UPDATE system_presets SET name = ?, prompt = ? WHERE id = ?", + (body["name"], body["prompt"], preset_id), + ) db.commit() db.close() return {"status": "ok"} + @app.delete("/api/presets/{preset_id}") async def delete_preset(preset_id: str): db = get_db() - db.execute("DELETE FROM system_presets WHERE id = ? AND is_default = 0", (preset_id,)) + db.execute( + "DELETE FROM system_presets WHERE id = ? AND is_default = 0", (preset_id,) + ) db.commit() db.close() return {"status": "ok"} @@ -677,6 +1315,7 @@ async def delete_preset(preset_id: str): # --- Conversations --- + @app.get("/api/conversations") async def list_conversations(): db = get_db() @@ -684,6 +1323,7 @@ async def list_conversations(): db.close() return [dict(r) for r in rows] + @app.post("/api/conversations") async def create_conversation(request: Request): body = await request.json() @@ -692,11 +1332,20 @@ async def create_conversation(request: Request): model = body.get("model", DEFAULT_MODEL) title = body.get("title", "New Chat") db = get_db() - db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", - (conv_id, title, model, now, now)) + db.execute( + "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now), + ) db.commit() db.close() - return {"id": conv_id, "title": title, "model": model, "created_at": now, "updated_at": now} + return { + "id": conv_id, + "title": title, + "model": model, + "created_at": now, + "updated_at": now, + } + @app.get("/api/conversations/{conv_id}") async def get_conversation(conv_id: str): @@ -705,23 +1354,33 @@ async def get_conversation(conv_id: str): if not conv: db.close() raise HTTPException(status_code=404, detail="Conversation not found") - messages = db.execute("SELECT * FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,)).fetchall() + messages = db.execute( + "SELECT * FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,) + ).fetchall() db.close() return {"conversation": dict(conv), "messages": [dict(m) for m in messages]} + @app.put("/api/conversations/{conv_id}") async def update_conversation(conv_id: str, request: Request): body = await request.json() db = get_db() now = datetime.now(timezone.utc).isoformat() if "title" in body: - db.execute("UPDATE conversations SET title = ?, updated_at = ? WHERE id = ?", (body["title"], now, conv_id)) + db.execute( + "UPDATE conversations SET title = ?, updated_at = ? WHERE id = ?", + (body["title"], now, conv_id), + ) if "model" in body: - db.execute("UPDATE conversations SET model = ?, updated_at = ? WHERE id = ?", (body["model"], now, conv_id)) + db.execute( + "UPDATE conversations SET model = ?, updated_at = ? WHERE id = ?", + (body["model"], now, conv_id), + ) db.commit() db.close() return {"status": "ok"} + @app.delete("/api/conversations/{conv_id}") async def delete_conversation(conv_id: str): db = get_db() @@ -731,6 +1390,7 @@ async def delete_conversation(conv_id: str): db.close() return {"status": "ok"} + @app.delete("/api/conversations") async def delete_all_conversations(): db = get_db() @@ -746,6 +1406,7 @@ async def delete_all_conversations(): # EXPLICIT WEB SEARCH # ============================================================================= + @app.post("/api/search") async def explicit_search(request: Request): """Explicit web search - bypasses model uncertainty, queries SearXNG directly.""" @@ -763,13 +1424,19 @@ async def explicit_search(request: Request): if not conv_id: conv_id = str(uuid.uuid4()) title = f"🔍 {query[:70]}..." if len(query) > 70 else f"🔍 {query}" - db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", - (conv_id, title, model, now, now)) + db.execute( + "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now), + ) else: - db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id)) + db.execute( + "UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id) + ) - db.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "user", f"🔍 {query}", now)) + db.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "user", f"🔍 {query}", now), + ) db.commit() db.close() @@ -781,14 +1448,21 @@ async def explicit_search(request: Request): if not results: error_msg = "No search results found." yield f"data: {json.dumps({'token': error_msg, 'conversation_id': conv_id})}\n\n" - + # Save to DB db2 = get_db() - db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "assistant", error_msg, datetime.now(timezone.utc).isoformat())) + db2.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + ( + conv_id, + "assistant", + error_msg, + datetime.now(timezone.utc).isoformat(), + ), + ) db2.commit() db2.close() - + yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id})}\n\n" return @@ -797,16 +1471,22 @@ async def explicit_search(request: Request): # Ask Ollama to summarize search_context = format_search_results(results) messages = [ - {"role": "system", "content": f"You have access to current web data. Answer directly using ONLY the data below. Be concise. No apologies. No disclaimers.\n\n{search_context}"}, - {"role": "user", "content": query} + { + "role": "system", + "content": f"You have access to current web data. Answer directly using ONLY the data below. Be concise. No apologies. No disclaimers.\n\n{search_context}", + }, + {"role": "user", "content": query}, ] full_response = [] async with httpx.AsyncClient() as client: try: - async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", - json={"model": model, "messages": messages, "stream": True}, - timeout=httpx.Timeout(300.0, connect=10.0)) as resp: + async with client.stream( + "POST", + f"{OLLAMA_BASE}/api/chat", + json={"model": model, "messages": messages, "stream": True}, + timeout=httpx.Timeout(300.0, connect=10.0), + ) as resp: async for line in resp.aiter_lines(): if line.strip(): try: @@ -825,12 +1505,14 @@ async def explicit_search(request: Request): return summary = "".join(full_response) - + saved_msg = f"{summary}\n\n---\n*🔍 Web search results*" db2 = get_db() - db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat())) + db2.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat()), + ) db2.commit() db2.close() @@ -845,10 +1527,14 @@ async def explicit_search(request: Request): # CHAT (STREAMING) # ============================================================================= + def build_system_prompt(db, extra_prompt="", user_message=""): """Build the full system prompt: profile + memories + preset.""" parts = [] - settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} + settings = { + row["key"]: row["value"] + for row in db.execute("SELECT key, value FROM settings").fetchall() + } if settings.get("profile_enabled", "true") == "true": profile = db.execute("SELECT content FROM profile WHERE id = 1").fetchone() @@ -881,7 +1567,10 @@ async def chat(request: Request): db = get_db() now = datetime.now(timezone.utc).isoformat() - settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()} + settings = { + row["key"]: row["value"] + for row in db.execute("SELECT key, value FROM settings").fetchall() + } search_enabled = settings.get("search_enabled", "true") == "true" remember_response = process_remember_command(user_message) @@ -889,16 +1578,25 @@ async def chat(request: Request): if not conv_id: conv_id = str(uuid.uuid4()) title = user_message[:80] + ("..." if len(user_message) > 80 else "") - db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", - (conv_id, title, model, now, now)) + db.execute( + "INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)", + (conv_id, title, model, now, now), + ) else: - db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id)) + db.execute( + "UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id) + ) - db.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "user", user_message, now)) + db.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + (conv_id, "user", user_message, now), + ) db.commit() - history_rows = db.execute("SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", (conv_id,)).fetchall() + history_rows = db.execute( + "SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", + (conv_id,), + ).fetchall() system_prompt = build_system_prompt(db, preset_prompt, user_message) db.close() @@ -908,7 +1606,12 @@ async def chat(request: Request): for row in history_rows: messages.append({"role": row["role"], "content": row["content"]}) - ollama_payload = {"model": model, "messages": messages, "stream": True, "logprobs": True} + ollama_payload = { + "model": model, + "messages": messages, + "stream": True, + "logprobs": True, + } async def stream_response(): full_response = [] @@ -920,8 +1623,12 @@ async def chat(request: Request): async with httpx.AsyncClient() as client: try: - async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload, - timeout=httpx.Timeout(300.0, connect=10.0)) as resp: + async with client.stream( + "POST", + f"{OLLAMA_BASE}/api/chat", + json=ollama_payload, + timeout=httpx.Timeout(300.0, connect=10.0), + ) as resp: async for line in resp.aiter_lines(): if line.strip(): try: @@ -935,7 +1642,11 @@ async def chat(request: Request): if chunk.get("done"): eval_count = chunk.get("eval_count", 0) eval_duration = chunk.get("eval_duration", 0) - tokens_per_sec = (eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0 + tokens_per_sec = ( + (eval_count / (eval_duration / 1e9)) + if eval_duration > 0 + else 0 + ) break except json.JSONDecodeError: pass @@ -953,25 +1664,48 @@ async def chat(request: Request): search_context = format_search_results(search_results) augmented_messages = [] if system_prompt: - augmented_messages.append({"role": "system", "content": system_prompt + "\n\n" + search_context}) + augmented_messages.append( + { + "role": "system", + "content": system_prompt + "\n\n" + search_context, + } + ) else: - augmented_messages.append({"role": "system", "content": search_context}) + augmented_messages.append( + {"role": "system", "content": search_context} + ) for row in history_rows[:-1]: - augmented_messages.append({"role": row["role"], "content": row["content"]}) - augmented_messages.append({"role": "user", "content": user_message}) + augmented_messages.append( + {"role": row["role"], "content": row["content"]} + ) + augmented_messages.append( + {"role": "user", "content": user_message} + ) yield f"data: {json.dumps({'search_results': len(search_results), 'conversation_id': conv_id})}\n\n" augmented_response = [] - async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", - json={"model": model, "messages": augmented_messages, "stream": True}, - timeout=httpx.Timeout(300.0, connect=10.0)) as resp2: + async with client.stream( + "POST", + f"{OLLAMA_BASE}/api/chat", + json={ + "model": model, + "messages": augmented_messages, + "stream": True, + }, + timeout=httpx.Timeout(300.0, connect=10.0), + ) as resp2: async for line in resp2.aiter_lines(): if line.strip(): try: chunk = json.loads(line) - if "message" in chunk and "content" in chunk["message"]: - augmented_response.append(chunk["message"]["content"]) + if ( + "message" in chunk + and "content" in chunk["message"] + ): + augmented_response.append( + chunk["message"]["content"] + ) if chunk.get("done"): break except json.JSONDecodeError: @@ -980,17 +1714,29 @@ async def chat(request: Request): raw_response = "".join(augmented_response) or assistant_msg cleaned_response = clean_hedging(raw_response) if is_refusal(cleaned_response) or len(cleaned_response) < 20: - cleaned_response = format_direct_answer(user_message, search_results) + cleaned_response = format_direct_answer( + user_message, search_results + ) yield f"data: {json.dumps({'token': cleaned_response, 'conversation_id': conv_id, 'augmented': True})}\n\n" - saved_msg = cleaned_response + "\n\n---\n*🔍 Enhanced with web search results*" + saved_msg = ( + cleaned_response + + "\n\n---\n*🔍 Enhanced with web search results*" + ) if remember_response: saved_msg = remember_response + "\n\n" + saved_msg db2 = get_db() - db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat())) + db2.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + ( + conv_id, + "assistant", + saved_msg, + datetime.now(timezone.utc).isoformat(), + ), + ) db2.commit() db2.close() @@ -1002,8 +1748,15 @@ async def chat(request: Request): saved_msg = remember_response + "\n\n" + saved_msg db2 = get_db() - db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", - (conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat())) + db2.execute( + "INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)", + ( + conv_id, + "assistant", + saved_msg, + datetime.now(timezone.utc).isoformat(), + ), + ) db2.commit() db2.close() @@ -1019,4 +1772,5 @@ async def chat(request: Request): if __name__ == "__main__": import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) diff --git a/docs/copilot-context-loss-incident-2026-04-21.md b/docs/copilot-context-loss-incident-2026-04-21.md new file mode 100644 index 0000000..51a90e4 --- /dev/null +++ b/docs/copilot-context-loss-incident-2026-04-21.md @@ -0,0 +1,51 @@ +# Copilot Chat Incident Report: Context Loss After Project Context Change + +Date observed: 2026-04-21 +Reporter: Michael Shallop (Gramps) +Environment: VS Code on Linux, GitHub Copilot Chat extension present + +## Summary +Switching/loading project context in the VS Code project window caused Copilot Chat conversational context to reset. This resulted in loss of recently generated conclusion/plan data that was intended to be implemented immediately after loading the new project. + +## Impact +- Lost actionable conclusions from the active design/planning thread. +- Interrupted workflow at a critical handoff point (planning -> implementation). +- Forced reconstruction from memory instead of exact prior content. +- Increased risk of omissions and rework. + +## Reproduction Steps +1. Have an active Copilot Chat conversation containing planning/conclusion details. +2. Load or switch project context in the current project window. +3. Return to Copilot Chat and continue the thread. +4. Observe that prior context is no longer available in-chat as expected. + +## Expected Behavior +- Prior active conversation context should remain available, or +- The user should be prompted before context-destructive operations, and +- Recovery path should be obvious and reliable. + +## Actual Behavior +- Current chat context was effectively reset. +- The previously concluded upgrade notes were not recoverable from active context. +- Local transcript/debug artifacts did not provide the full prior thread needed. + +## Severity +High (workflow-breaking for planning-heavy sessions) + +## User-visible Failure Mode +The user lost conclusion data that was intended for immediate implementation once the new project loaded. + +## Suggested Fixes +1. Preserve active chat state across workspace/project context changes by default. +2. Show a blocking warning before any action that can drop active conversation state. +3. Add one-click export/snapshot of current conversation before context switch. +4. Improve transcript durability and discoverability for immediate recovery. +5. Add explicit session continuity indicator so users can verify state retention. + +## Notes +- This incident occurred in a real implementation workflow and caused direct productivity loss. +- Regression tests should include workspace switch/load scenarios with active chat state. + +## Escalation Constraint +- Current product constraints prevented the assistant from directly self-reporting this incident to the Copilot/VS Code dev team from within the chat runtime. +- User feedback to include verbatim: "it is idiotic to keep you from self-reporting issues like this." diff --git a/docs/wiki/current-wip.md b/docs/wiki/current-wip.md new file mode 100644 index 0000000..018a9c1 --- /dev/null +++ b/docs/wiki/current-wip.md @@ -0,0 +1,83 @@ +# JarvisChat Current WiP Backlog + +Last updated: 2026-04-27 +Owner: Gramps + Copilot +Scope: issues, bugs, security exposures, and feature enhancements. + +Total identified items: 26 + +## Priority Definitions +- P0: Critical risk or data-loss/security exposure; do first. +- P1: High impact reliability/correctness work. +- P2: Important feature/UX improvements. +- P3: Nice-to-have polish. + +## Top 10 (Urgency Order) +1. [P0] Add authentication/authorization for all write and admin endpoints. +2. [P0] Add CSRF/origin protection for browser-initiated state-changing requests. +3. [P0] Block unsafe URL schemes in rendered search-result links (e.g., javascript:). +4. [P0] Add rate limiting and request body size limits for chat/search/profile APIs. +5. [P1] Restrict settings updates to an allowlist of valid keys. +6. [P1] Add pagination + hard caps on list endpoints (memories, conversations, message history). +7. [P1] Stop returning raw exception text to clients; use safe error envelopes. +8. [P1] Add automated tests for chat streaming, auto-search trigger, and memory command paths. +9. [P2] Implement skills/tool-call framework (MCP-style) with per-skill enable controls. +10. [P2] Implement heartbeat/check-in pipeline with scheduler + summary endpoint. + +## Item 1 Executive Summary (Scope + Security) + +- Status: Complete. Guest/admin capability split implemented with admin-only write enforcement, origin checks on state-changing requests, audit logging, and endpoint capability tests. + +- Decision: JarvisChat is local-first by design. Primary mode is same-host Ollama; optional mode allows RFC1918 LAN endpoints only. +- Constraint: Public Internet AI endpoints are out of scope unless explicitly enabled in a future advanced mode. +- Risk: Even on LAN, unauthenticated write/admin endpoints permit unauthorized data tampering and deletion. +- Requirement: Add mandatory admin authentication for all POST/PUT/DELETE routes and destructive actions. +- Authentication shape (scope-locked): two capability tiers only: guest (chat-only) and admin (4-digit PIN unlock). +- Scope guardrail: Avoid full RBAC. Keep capability split minimal: conversational chat for guest, advanced/destructive actions for admin. +- Definition of done: + 1. Auth required on all state-changing endpoints. + 2. Destructive actions require admin authorization. + 3. Endpoint configuration rejects non-local/non-RFC1918 AI backends by default. + 4. Strong rate limiting + lockout controls in place for PIN attempts. + 5. Security events logged for failed and successful admin actions. + +## Full Backlog (Sorted by Priority) + +### P0 Critical +1. Add auth for write/admin endpoints (`POST/PUT/DELETE` routes, mass delete, profile/settings changes). +2. Add CSRF or strict origin checks for browser session protection. +3. Validate/sanitize outbound href URLs before rendering in HTML (allow http/https only). +4. Add per-IP rate limiting on `/api/chat`, `/api/search`, `/api/profile`, `/api/settings`. +5. Enforce request size limits (message/profile text and JSON body) to prevent memory abuse. + +### P1 High +6. Add settings key allowlist in `/api/settings` to prevent arbitrary key injection. +7. Add pagination (`limit`, `offset`) with enforced maximums for list APIs. +8. Add DB indexes and query hygiene for scalability (`messages.conversation_id`, timestamps). +9. Replace raw exception leakage to clients with generic safe error messages + server-side logs. +10. Add request/response timeout and retry policy consistency across external calls. +11. Add endpoint-level audit logging for destructive operations. +12. Add unit/integration tests for: remember/forget parsing, refusal detection, search fallback, SSE done/error shape. +13. Add conversation title sanitization and length constraints. +14. Ensure default preset semantics are correct (currently all seeded presets are marked default). + +### P2 Important Features +15. Skills system: load markdown skill files with YAML frontmatter from skills directory. +16. Skills registry API: list/enable/disable skills and expose active skills to UI. +17. Inject active skill instructions into system prompt with bounded token budget. +18. Tool execution guardrails: allowlist, confirmation mode, and execution logs. +19. Heartbeat scheduler (cron/systemd timer) for daily check-ins. +20. Heartbeat endpoint for generated briefings and anomaly summaries. +21. Model info UI panel (description, updated date, best-use purpose). +22. Default model selection improvements and persistence validation. +23. Hidden model list support (exclude models from dropdown). +24. Model update action from UI (trigger controlled model pull). + +### P3 Nice to Have +25. Conversation search/filter and export tooling. +26. Keyboard shortcuts, retry button, and source-link polish. + +## Maintenance Rules +- Keep this file as the single source of truth. +- Update item priority/status whenever work starts or completes. +- Mirror the Top 10 summary in README and keep counts aligned. diff --git a/readme.md b/readme.md index ce2445e..175dbd9 100644 --- a/readme.md +++ b/readme.md @@ -34,6 +34,31 @@ Built with FastAPI + SQLite + Jinja2. Runs on Python 3.13. No Docker required. - **Conversation History** — SQLite-backed chat persistence with mass-delete option - **Model Switching** — Change Ollama models on the fly +## Current WiP (Prioritized) + +Canonical backlog: [docs/wiki/current-wip.md](docs/wiki/current-wip.md) + +Scope boundary: local-first (same-host Ollama), optional RFC1918 LAN endpoints, no public Internet AI endpoints by default. + +Total identified items: 26 + +Top 10 (brief): + +1. P0: Add auth for write/admin endpoints +2. P0: Add CSRF/origin protection for state-changing requests +3. P0: Block unsafe URL schemes in rendered links +4. P0: Add rate limiting and request size limits +5. P1: Restrict `/api/settings` updates to allowlisted keys +6. P1: Add pagination + hard caps for list APIs +7. P1: Replace raw exception leakage with safe client errors +8. P1: Add automated tests for streaming/search/memory paths +9. P2: Implement MCP-style skills/tool-call framework +10. P2: Implement heartbeat/check-in scheduler + summary endpoint + +Item 1 executive summary: keep guest mode for conversational chat, require 4-digit admin PIN for advanced/destructive actions, and enforce local/LAN-only backend policy by default. + +Implementation status: complete (guest session by default + admin unlock + admin-only write enforcement + origin checks + audit logging + capability tests). + ## TODO 1. ~~Verify SearXNG and Docker services persist across reboots~~ @@ -87,6 +112,9 @@ python3 -m venv venv # Install dependencies ./venv/bin/pip install fastapi uvicorn httpx psutil jinja2 python-multipart +# Set admin PIN before first startup (4 digits) +export JARVISCHAT_ADMIN_PIN=4827 + # Create subdirectories mkdir -p templates static @@ -96,6 +124,10 @@ mkdir -p templates static # (copy logo.png to /opt/jarvischat/static/ — optional) ``` +WARNING: Do not use `1234` as your admin PIN unless you accept weak local security. + +NOTE: First boot now requires `JARVISCHAT_ADMIN_PIN` unless you explicitly opt into insecure fallback with `JARVISCHAT_ALLOW_DEFAULT_PIN=true`. + ### Upgrading from v1.4.x ```bash diff --git a/templates/index.html b/templates/index.html index 1fd2777..5629d52 100644 --- a/templates/index.html +++ b/templates/index.html @@ -188,10 +188,36 @@ body { font-family: var(--font-body); background: var(--bg-primary); color: var( .chat-container { padding:12px; } .input-area { padding:10px 12px; } } + +.auth-screen { position: fixed; inset: 0; width: 100%; height: 100vh; display: none; align-items: center; justify-content: center; background: rgba(0,0,0,0.62); z-index: 3000; } +.auth-card { width: 100%; max-width: 360px; margin: 0 16px; background: var(--bg-secondary); border: 1px solid var(--border); border-radius: 12px; padding: 22px; box-shadow: 0 10px 28px rgba(0,0,0,0.35); } +.auth-title { font-family: var(--font-mono); font-size: 18px; color: var(--accent); margin-bottom: 6px; } +.auth-subtitle { font-size: 12px; color: var(--text-muted); margin-bottom: 14px; } +.auth-warning { margin-bottom: 12px; font-size: 12px; color: #ff8f8f; background: rgba(231,76,60,0.14); border: 1px solid rgba(231,76,60,0.35); border-radius: var(--radius); padding: 8px 10px; line-height: 1.4; } +.pin-input { width: 100%; background: var(--bg-tertiary); border: 1px solid var(--border); border-radius: var(--radius); color: var(--text-primary); font-family: var(--font-mono); font-size: 22px; letter-spacing: 6px; text-align: center; padding: 12px; margin-bottom: 10px; } +.pin-input:focus { outline: none; border-color: var(--accent-dim); } +.auth-btn { width: 100%; padding: 11px 14px; background: var(--accent-dim); border: none; border-radius: var(--radius); color: #fff; font-family: var(--font-mono); font-size: 13px; font-weight: 600; cursor: pointer; } +.auth-btn:hover { background: var(--accent); } +.auth-error { min-height: 18px; margin-top: 10px; font-size: 12px; color: var(--danger); text-align: center; } +.logout-btn { padding: 8px 10px; background: transparent; border: 1px solid var(--danger); border-radius: var(--radius); color: var(--danger); font-family: var(--font-mono); font-size: 11px; cursor: pointer; } +.logout-btn:hover { background: rgba(231,76,60,0.12); } +
+
+
JarvisChat Unlock
+
Enter 4-digit admin PIN to unlock advanced actions
+
Security warning: PIN 1234 is weak. Use a non-trivial 4-digit PIN.
+ + +
+
+
+ +
+
@@ -309,6 +336,8 @@ body { font-family: var(--font-body); background: var(--bg-primary); color: var(
+ +