From 66b086c3f3c0889bd77ef5f0b6d4015273dd886f Mon Sep 17 00:00:00 2001 From: gramps Date: Sat, 27 Jun 2026 16:03:19 -0700 Subject: [PATCH] fix: restore EMBED_URL pointing to ollama on 192.168.50.210:11434 --- AGENTS.md | 2 +- README.md | 2 +- rag.py | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index db213a7..0fee6ca 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -102,7 +102,7 @@ The upstream request includes `"logprobs": true`. `parse_llama_stream_chunk()` e - `ALLOWED_SETTINGS_KEYS` in `config.py` controls which keys the UI can write via `/api/settings` - Settings table seeded with defaults (`profile_enabled`, `search_enabled`, `memory_enabled`, `skills_enabled`, `default_model`) — never overwritten by `init_db()` - Profile table uses singleton row `id=1` -- RAG embedding requests go to `LLAMA_SERVER_BASE` at `/api/embeddings` +- RAG embedding requests go to `EMBED_URL` at `/api/embeddings` (separate Ollama instance) ### SSE Protocol diff --git a/README.md b/README.md index 6e1a959..dad0b20 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Developer wiki: [docs/wiki/Home.md](docs/wiki/Home.md) - **`COMPLETIONS_API_KEY`** — auto-generated secret key for the OpenAI-compatible endpoint, overridable via `JARVISCHAT_COMPLETIONS_API_KEY` env var - **Perplexity auto-search fixed** — upstream request now sends `"logprobs": true`, `parse_llama_stream_chunk()` extracts per-token logprobs, so `calculate_perplexity()` and `is_uncertain()` work correctly (was dead code) - **All `/api/models` endpoints** — now correctly target `LLAMA_SERVER_BASE` (llama-server on port 8081) instead of the old Ollama port; `/api/ps` uses `/v1/models` endpoint -- **RAG embedding endpoint fixed** — hardcoded `EMBED_URL` replaced with `LLAMA_SERVER_BASE` from config, respecting the `JARVISCHAT_LLAMA_SERVER_BASE` env var +- **RAG embedding endpoint fixed** — `EMBED_URL` changed from old server `:8081` to correct host/port `http://192.168.50.210:11434` (Ollama on new machine) - **Error messages corrected** — all user-facing errors say "inference server" instead of "Ollama" or "llama-server" - **Secure SSE protocol** — raw search results are no longer leaked in the SSE event stream - **FTS5 query safety** — operator keywords (`AND`, `OR`, `NOT`, `NEAR`) are double-quoted to prevent parse errors diff --git a/rag.py b/rag.py index f090f8d..e0554c9 100644 --- a/rag.py +++ b/rag.py @@ -7,11 +7,12 @@ import httpx from db import get_db, get_setting, list_skills_with_state, format_active_skills_prompt from memory import search_memories -from config import LLAMA_SERVER_BASE, MAX_SKILL_PROMPT_CHARS +from config import MAX_SKILL_PROMPT_CHARS log = logging.getLogger("jarvischat") QDRANT_URL = "http://192.168.50.108:6333" +EMBED_URL = "http://192.168.50.210:11434" EMBED_MODEL = "mxbai-embed-large" RAG_COLLECTION = "jarvis_rag" RAG_SCORE_THRESHOLD = 0.25 @@ -21,7 +22,7 @@ async def query_rag(query: str, limit: int = 3) -> list: try: async with httpx.AsyncClient() as client: embed_resp = await client.post( - f"{LLAMA_SERVER_BASE}/api/embeddings", + f"{EMBED_URL}/api/embeddings", json={"model": EMBED_MODEL, "prompt": query}, timeout=10.0, )