Files
jarvisChat/routers/search_route.py
gramps d01dd3b761 refactor(arch): modular package structure — split monolithic app.py into config/db/auth/memory/search/rag/gpu + routers/
- config.py: all constants, env vars, limits, skill registry, profiles
- db.py: schema init, connection factory, skill state helpers
- security.py: PIN hashing, audit logging, rate limiting, CSRF, request helpers
- auth.py: session management, PIN verify, auth routes
- memory.py: FTS5 CRUD + remember/forget command processing
- search.py: SearXNG integration, perplexity scoring, refusal/hedge detection
- gpu.py: rocm-smi stats
- rag.py: Qdrant vector search + system prompt assembly
- routers/: conversations, memories, models, presets, profile, settings, skills, chat, search
- app.py: slim entry point, middleware, router registration only

Bumps to v1.9.0
2026-06-16 08:17:46 -07:00

109 lines
4.7 KiB
Python

"""JarvisChat routers - /api/search explicit search endpoint."""
import json
import logging
import uuid
from datetime import datetime, timezone
import httpx
from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse
from config import DEFAULT_MODEL, LLAMA_SERVER_BASE, MAX_SEARCH_QUERY_CHARS
from db import get_db
from search import query_searxng, format_search_results
from routers.chat import parse_llama_stream_chunk
from security import read_json_body, log_incident, BODY_LIMIT_CHAT_BYTES
log = logging.getLogger("jarvischat")
router = APIRouter()
@router.post("/api/search")
async def explicit_search(request: Request):
body = await read_json_body(request, BODY_LIMIT_CHAT_BYTES)
query = body.get("query", "").strip()
if len(query) > MAX_SEARCH_QUERY_CHARS:
raise HTTPException(status_code=413, detail="Search query is too long")
conv_id = body.get("conversation_id")
model = body.get("model", DEFAULT_MODEL)
if not query:
raise HTTPException(status_code=400, detail="Empty query")
db = get_db()
now = datetime.now(timezone.utc).isoformat()
if not conv_id:
conv_id = str(uuid.uuid4())
title = f"🔍 {query[:70]}..." if len(query) > 70 else f"🔍 {query}"
db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)",
(conv_id, title, model, now, now))
else:
db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id))
db.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)",
(conv_id, "user", f"🔍 {query}", now))
db.commit()
db.close()
async def stream_search():
yield f"data: {json.dumps({'conversation_id': conv_id, 'searching': True})}\n\n"
results = await query_searxng(query, max_results=5)
if not results:
error_msg = "No search results found."
yield f"data: {json.dumps({'token': error_msg, 'conversation_id': conv_id})}\n\n"
db2 = get_db()
db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)",
(conv_id, "assistant", error_msg, datetime.now(timezone.utc).isoformat()))
db2.commit()
db2.close()
yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id})}\n\n"
return
yield f"data: {json.dumps({'search_results': len(results), 'conversation_id': conv_id})}\n\n"
search_context = format_search_results(results)
messages = [
{"role": "system", "content": f"You have access to current web data. Answer directly using ONLY the data below. Be concise. No apologies. No disclaimers.\n\n{search_context}"},
{"role": "user", "content": query},
]
full_response = []
async with httpx.AsyncClient() as client:
try:
async with client.stream(
"POST", f"{LLAMA_SERVER_BASE}/v1/chat/completions",
json={"model": model, "messages": messages, "stream": True},
timeout=httpx.Timeout(300.0, connect=10.0),
) as resp:
async for line in resp.aiter_lines():
if line.strip():
token, done, _ = parse_llama_stream_chunk(line)
if token:
full_response.append(token)
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
if done:
break
except Exception as e:
incident_key = log_incident("search_summarization_stream",
message="Stream failure during explicit search summarization",
request=request, exc=e)
yield f"data: {json.dumps({'error': 'Search summarization could not complete right now.', 'error_key': incident_key})}\n\n"
return
summary = "".join(full_response)
saved_msg = f"{summary}\n\n---\n*🔍 Web search results*"
db2 = get_db()
db2.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)",
(conv_id, "assistant", saved_msg, datetime.now(timezone.utc).isoformat()))
db2.commit()
db2.close()
yield f"data: {json.dumps({'raw_results': results, 'conversation_id': conv_id})}\n\n"
yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'searched': True})}\n\n"
return StreamingResponse(stream_search(), media_type="text/event-stream")