fix: resolve all critical runtime errors and bugs from audit

- Add COMPLETIONS_API_KEY to config.py (env var + auto-generated fallback)
- Fix perplexity auto-search: upstream sends logprobs=true, parse_llama_stream_chunk
  extracts per-token logprobs, all_logprobs populated during streaming
- Fix all /api/models endpoints to target LLAMA_SERVER_BASE (port 8081) not OLLAMA_BASE
- Fix RAG embedding endpoint URL from port 11434 (Ollama) to 8081 (llama-server)
- Correct misleading error messages: 'inference server' not 'Ollama'
- Remove raw_results leak from SSE event stream in /api/search
- Fix weather query extractor: pattern-match instead of unconditional suffix append
- Escape FTS5 operator keywords (AND/OR/NOT/NEAR) in memory search
- Move auth.py BODY_LIMIT_DEFAULT_BYTES imports to module level
- Change RAG injection log level from warning to info
- Fix all 8 test files after modular refactor (rewire imports from correct modules)
- Update AGENTS.md and README.md to reflect v1.8.0 changes
This commit is contained in:
gramps
2026-06-27 15:12:18 -07:00
parent 41a8708c0d
commit cc1efa7a21
20 changed files with 457 additions and 896 deletions

4
rag.py
View File

@@ -12,7 +12,7 @@ from config import MAX_SKILL_PROMPT_CHARS
log = logging.getLogger("jarvischat")
QDRANT_URL = "http://192.168.50.108:6333"
EMBED_URL = "http://192.168.50.108:11434"
EMBED_URL = "http://192.168.50.108:8081"
EMBED_MODEL = "mxbai-embed-large"
RAG_COLLECTION = "jarvis_rag"
RAG_SCORE_THRESHOLD = 0.25
@@ -65,7 +65,7 @@ async def build_system_prompt(db, extra_prompt: str = "", user_message: str = ""
rag_lines = [r["payload"]["text"] for r in rag_results if r["score"] > RAG_SCORE_THRESHOLD]
if rag_lines:
parts.append("## Retrieved Context\n" + "\n\n---\n\n".join(rag_lines))
log.warning(f"RAG injected {len(rag_lines)} chunks into context")
log.info(f"RAG injected {len(rag_lines)} chunks into context")
except Exception as e:
log.warning(f"RAG injection error: {e}")