- Add COMPLETIONS_API_KEY to config.py (env var + auto-generated fallback) - Fix perplexity auto-search: upstream sends logprobs=true, parse_llama_stream_chunk extracts per-token logprobs, all_logprobs populated during streaming - Fix all /api/models endpoints to target LLAMA_SERVER_BASE (port 8081) not OLLAMA_BASE - Fix RAG embedding endpoint URL from port 11434 (Ollama) to 8081 (llama-server) - Correct misleading error messages: 'inference server' not 'Ollama' - Remove raw_results leak from SSE event stream in /api/search - Fix weather query extractor: pattern-match instead of unconditional suffix append - Escape FTS5 operator keywords (AND/OR/NOT/NEAR) in memory search - Move auth.py BODY_LIMIT_DEFAULT_BYTES imports to module level - Change RAG injection log level from warning to info - Fix all 8 test files after modular refactor (rewire imports from correct modules) - Update AGENTS.md and README.md to reflect v1.8.0 changes
81 lines
3.1 KiB
Python
81 lines
3.1 KiB
Python
"""
|
|
JarvisChat - RAG pipeline: Qdrant vector search + system prompt assembly.
|
|
"""
|
|
import logging
|
|
|
|
import httpx
|
|
|
|
from db import get_db, get_setting, list_skills_with_state, format_active_skills_prompt
|
|
from memory import search_memories
|
|
from config import MAX_SKILL_PROMPT_CHARS
|
|
|
|
log = logging.getLogger("jarvischat")
|
|
|
|
QDRANT_URL = "http://192.168.50.108:6333"
|
|
EMBED_URL = "http://192.168.50.108:8081"
|
|
EMBED_MODEL = "mxbai-embed-large"
|
|
RAG_COLLECTION = "jarvis_rag"
|
|
RAG_SCORE_THRESHOLD = 0.25
|
|
|
|
|
|
async def query_rag(query: str, limit: int = 3) -> list:
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
embed_resp = await client.post(
|
|
f"{EMBED_URL}/api/embeddings",
|
|
json={"model": EMBED_MODEL, "prompt": query},
|
|
timeout=10.0,
|
|
)
|
|
if embed_resp.status_code != 200:
|
|
return []
|
|
vector = embed_resp.json()["embedding"]
|
|
search_resp = await client.post(
|
|
f"{QDRANT_URL}/collections/{RAG_COLLECTION}/points/search",
|
|
json={"vector": vector, "limit": limit, "with_payload": True},
|
|
timeout=10.0,
|
|
)
|
|
if search_resp.status_code != 200:
|
|
return []
|
|
return search_resp.json().get("result", [])
|
|
except Exception as e:
|
|
log.warning(f"RAG query error: {e}")
|
|
return []
|
|
|
|
|
|
async def build_system_prompt(db, extra_prompt: str = "", user_message: str = "") -> str:
|
|
parts = []
|
|
settings = {row["key"]: row["value"] for row in db.execute("SELECT key, value FROM settings").fetchall()}
|
|
|
|
if settings.get("profile_enabled", "true") == "true":
|
|
profile = db.execute("SELECT content FROM profile WHERE id = 1").fetchone()
|
|
if profile and profile["content"].strip():
|
|
parts.append(profile["content"].strip())
|
|
|
|
if settings.get("memory_enabled", "true") == "true" and user_message:
|
|
memories = search_memories(user_message, limit=5)
|
|
if memories:
|
|
memory_lines = [f"- {m['fact']}" for m in memories]
|
|
parts.append("## Relevant Context from Memory\n" + "\n".join(memory_lines))
|
|
log.debug(f"Injected {len(memories)} memories into context")
|
|
|
|
if user_message:
|
|
try:
|
|
rag_results = await query_rag(user_message)
|
|
if rag_results:
|
|
rag_lines = [r["payload"]["text"] for r in rag_results if r["score"] > RAG_SCORE_THRESHOLD]
|
|
if rag_lines:
|
|
parts.append("## Retrieved Context\n" + "\n\n---\n\n".join(rag_lines))
|
|
log.info(f"RAG injected {len(rag_lines)} chunks into context")
|
|
except Exception as e:
|
|
log.warning(f"RAG injection error: {e}")
|
|
|
|
if settings.get("skills_enabled", "true") == "true":
|
|
active_skills = [s for s in list_skills_with_state(db) if s["enabled"]]
|
|
if active_skills:
|
|
parts.append(format_active_skills_prompt(active_skills))
|
|
|
|
if extra_prompt and extra_prompt.strip():
|
|
parts.append(extra_prompt.strip())
|
|
|
|
return "\n\n---\n\n".join(parts) if parts else ""
|