fix: resolve all critical runtime errors and bugs from audit

- Add COMPLETIONS_API_KEY to config.py (env var + auto-generated fallback) - Fix perplexity auto-search: upstream sends logprobs=true, parse_llama_stream_chunk extracts per-token logprobs, all_logprobs populated during streaming - Fix all /api/models endpoints to target LLAMA_SERVER_BASE (port 8081) not OLLAMA_BASE - Fix RAG embedding endpoint URL from port 11434 (Ollama) to 8081 (llama-server) - Correct misleading error messages: 'inference server' not 'Ollama' - Remove raw_results leak from SSE event stream in /api/search - Fix weather query extractor: pattern-match instead of unconditional suffix append - Escape FTS5 operator keywords (AND/OR/NOT/NEAR) in memory search - Move auth.py BODY_LIMIT_DEFAULT_BYTES imports to module level - Change RAG injection log level from warning to info - Fix all 8 test files after modular refactor (rewire imports from correct modules) - Update AGENTS.md and README.md to reflect v1.8.0 changes
2026-06-27 15:12:18 -07:00
parent 41a8708c0d
commit cc1efa7a21
20 changed files with 457 additions and 896 deletions
--- a/routers/search_route.py
+++ b/routers/search_route.py
@@ -35,14 +35,14 @@ async def explicit_search(request: Request):

    if not conv_id:
        conv_id = str(uuid.uuid4())
-        title = f"🔍 {query[:70]}..." if len(query) > 70 else f"🔍 {query}"
+        title = query[:70] + "..." if len(query) > 70 else query
        db.execute("INSERT INTO conversations (id, title, model, created_at, updated_at) VALUES (?, ?, ?, ?, ?)",
                   (conv_id, title, model, now, now))
    else:
        db.execute("UPDATE conversations SET updated_at = ? WHERE id = ?", (now, conv_id))

    db.execute("INSERT INTO messages (conversation_id, role, content, created_at) VALUES (?, ?, ?, ?)",
-               (conv_id, "user", f"🔍 {query}", now))
+               (conv_id, "user", query, now))
    db.commit()
    db.close()

@@ -80,7 +80,7 @@ async def explicit_search(request: Request):
                ) as resp:
                    async for line in resp.aiter_lines():
                        if line.strip():
-                            token, done, _ = parse_llama_stream_chunk(line)
+                            token, done, _, _ = parse_llama_stream_chunk(line)
                            if token:
                                full_response.append(token)
                                yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
@@ -102,7 +102,6 @@ async def explicit_search(request: Request):
        db2.commit()
        db2.close()

-        yield f"data: {json.dumps({'raw_results': results, 'conversation_id': conv_id})}\n\n"
        yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'searched': True})}\n\n"

    return StreamingResponse(stream_search(), media_type="text/event-stream")