Compare commits

..

2 Commits

5 changed files with 2408 additions and 67 deletions

158
app.py
View File

@@ -56,8 +56,8 @@ syslog_handler.setFormatter(
log.addHandler(syslog_handler) log.addHandler(syslog_handler)
# --- Configuration --- # --- Configuration ---
VERSION = "1.7.8" VERSION = "1.7.6"
OLLAMA_BASE = "http://localhost:11434" OLLAMA_BASE = os.environ.get("OLLAMA_BASE", "http://localhost:11434")
SEARXNG_BASE = "http://localhost:8888" SEARXNG_BASE = "http://localhost:8888"
BASE_DIR = Path(__file__).parent BASE_DIR = Path(__file__).parent
DB_PATH = BASE_DIR / "jarvischat.db" DB_PATH = BASE_DIR / "jarvischat.db"
@@ -1480,14 +1480,24 @@ async def index(request: Request):
return templates.TemplateResponse(request, "index.html", {"version": VERSION}) return templates.TemplateResponse(request, "index.html", {"version": VERSION})
#@app.get("/api/models")
#async def list_models():
# async with httpx.AsyncClient() as client:
# try:
# resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10)
# return resp.json()
# except httpx.ConnectError:
# raise HTTPException(status_code=502, detail="Cannot connect to Ollama.")
@app.get("/api/models") @app.get("/api/models")
async def list_models(): async def list_models():
async with httpx.AsyncClient() as client: async with httpx.AsyncClient() as client:
try: try:
resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10) resp = await client.get(f"{OLLAMA_BASE}/v1/models", timeout=10)
return resp.json() data = resp.json()
models = [{"name": m["id"], "model": m["id"]} for m in data.get("data", [])]
return {"models": models}
except httpx.ConnectError: except httpx.ConnectError:
raise HTTPException(status_code=502, detail="Cannot connect to Ollama.") raise HTTPException(status_code=502, detail="Cannot connect to llama-server.")
@app.get("/api/ps") @app.get("/api/ps")
@@ -1962,16 +1972,12 @@ async def explicit_search(request: Request):
) as resp: ) as resp:
async for line in resp.aiter_lines(): async for line in resp.aiter_lines():
if line.strip(): if line.strip():
try: token, done, _ = parse_llama_stream_chunk(line)
chunk = json.loads(line) if token:
if "message" in chunk and "content" in chunk["message"]: full_response.append(token)
token = chunk["message"]["content"] yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
full_response.append(token) if done:
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n" break
if chunk.get("done"):
break
except json.JSONDecodeError:
pass
except Exception as e: except Exception as e:
incident_key = log_incident( incident_key = log_incident(
"search_summarization_stream", "search_summarization_stream",
@@ -2010,7 +2016,32 @@ async def explicit_search(request: Request):
# ============================================================================= # =============================================================================
def build_system_prompt(db, extra_prompt="", user_message=""):
async def query_rag(query: str, limit: int = 3) -> list[dict]:
"""Query Qdrant for semantically relevant chunks."""
try:
async with httpx.AsyncClient() as client:
embed_resp = await client.post(
"http://192.168.50.108:11434/api/embeddings",
json={"model": "mxbai-embed-large", "prompt": query},
timeout=10.0,
)
if embed_resp.status_code != 200:
return []
vector = embed_resp.json()["embedding"]
search_resp = await client.post(
"http://192.168.50.108:6333/collections/jarvis_rag/points/search",
json={"vector": vector, "limit": limit, "with_payload": True},
timeout=10.0,
)
if search_resp.status_code != 200:
return []
return search_resp.json().get("result", [])
except Exception as e:
log.warning(f"RAG query error: {e}")
return []
async def build_system_prompt(db, extra_prompt="", user_message=""):
"""Build the full system prompt: profile + memories + preset.""" """Build the full system prompt: profile + memories + preset."""
parts = [] parts = []
settings = { settings = {
@@ -2030,6 +2061,17 @@ def build_system_prompt(db, extra_prompt="", user_message=""):
parts.append("## Relevant Context from Memory\n" + "\n".join(memory_lines)) parts.append("## Relevant Context from Memory\n" + "\n".join(memory_lines))
log.debug(f"Injected {len(memories)} memories into context") log.debug(f"Injected {len(memories)} memories into context")
if user_message:
try:
rag_results = await query_rag(user_message)
if rag_results:
rag_lines = [r["payload"]["text"] for r in rag_results if r["score"] > 0.25]
if rag_lines:
parts.append("## Retrieved Context\n" + "\n\n---\n\n".join(rag_lines))
log.warning(f"RAG injected {len(rag_lines)} chunks into context")
except Exception as e:
log.warning(f"RAG injection error: {e}")
if settings.get("skills_enabled", "true") == "true": if settings.get("skills_enabled", "true") == "true":
active_skills = [s for s in list_skills_with_state(db) if s["enabled"]] active_skills = [s for s in list_skills_with_state(db) if s["enabled"]]
if active_skills: if active_skills:
@@ -2041,6 +2083,42 @@ def build_system_prompt(db, extra_prompt="", user_message=""):
return "\n\n---\n\n".join(parts) if parts else "" return "\n\n---\n\n".join(parts) if parts else ""
def parse_llama_stream_chunk(line: str) -> tuple[str | None, bool, dict]:
"""Parse OpenAI-compatible SSE chunk. Returns (token, is_done, stats)."""
if line.startswith("data: "):
line = line[6:]
if line.strip() == "[DONE]":
return None, True, {}
try:
chunk = json.loads(line)
# OpenAI format
choices = chunk.get("choices", [])
if choices:
delta = choices[0].get("delta", {})
token = delta.get("content")
finish = choices[0].get("finish_reason")
stats = {}
if finish == "stop":
usage = chunk.get("usage", {})
stats["tokens_per_sec"] = usage.get("tokens_per_second", 0.0)
return token, finish == "stop", stats
# Ollama format fallback
if "message" in chunk and "content" in chunk["message"]:
token = chunk["message"]["content"]
done = chunk.get("done", False)
stats = {}
if done:
eval_count = chunk.get("eval_count", 0)
eval_duration = chunk.get("eval_duration", 0)
stats["tokens_per_sec"] = (
(eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0
)
return token, done, stats
except json.JSONDecodeError:
pass
return None, False, {}
@app.post("/api/chat") @app.post("/api/chat")
async def chat(request: Request): async def chat(request: Request):
body = await read_json_body(request, BODY_LIMIT_CHAT_BYTES) body = await read_json_body(request, BODY_LIMIT_CHAT_BYTES)
@@ -2086,7 +2164,7 @@ async def chat(request: Request):
"SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC", "SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC",
(conv_id,), (conv_id,),
).fetchall() ).fetchall()
system_prompt = build_system_prompt(db, preset_prompt, user_message) system_prompt = await build_system_prompt(db, preset_prompt, user_message)
db.close() db.close()
messages = [] messages = []
@@ -2099,7 +2177,6 @@ async def chat(request: Request):
"model": model, "model": model,
"messages": messages, "messages": messages,
"stream": True, "stream": True,
"logprobs": True,
} }
async def stream_response(): async def stream_response():
@@ -2120,25 +2197,12 @@ async def chat(request: Request):
) as resp: ) as resp:
async for line in resp.aiter_lines(): async for line in resp.aiter_lines():
if line.strip(): if line.strip():
try: token, done, stats = parse_llama_stream_chunk(line)
chunk = json.loads(line) if token:
if "message" in chunk and "content" in chunk["message"]: full_response.append(token)
token = chunk["message"]["content"] yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
full_response.append(token) if done:
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n" tokens_per_sec = stats.get("tokens_per_sec", 0.0)
if "logprobs" in chunk and chunk["logprobs"]:
all_logprobs.extend(chunk["logprobs"])
if chunk.get("done"):
eval_count = chunk.get("eval_count", 0)
eval_duration = chunk.get("eval_duration", 0)
tokens_per_sec = (
(eval_count / (eval_duration / 1e9))
if eval_duration > 0
else 0
)
break
except json.JSONDecodeError:
pass
assistant_msg = "".join(full_response) assistant_msg = "".join(full_response)
perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0 perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0
@@ -2186,19 +2250,11 @@ async def chat(request: Request):
) as resp2: ) as resp2:
async for line in resp2.aiter_lines(): async for line in resp2.aiter_lines():
if line.strip(): if line.strip():
try: token2, done2, _ = parse_llama_stream_chunk(line)
chunk = json.loads(line) if token2:
if ( augmented_response.append(token2)
"message" in chunk if done2:
and "content" in chunk["message"] break
):
augmented_response.append(
chunk["message"]["content"]
)
if chunk.get("done"):
break
except json.JSONDecodeError:
pass
raw_response = "".join(augmented_response) or assistant_msg raw_response = "".join(augmented_response) or assistant_msg
cleaned_response = clean_hedging(raw_response) cleaned_response = clean_hedging(raw_response)
@@ -2251,6 +2307,8 @@ async def chat(request: Request):
yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n" yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n"
except httpx.RemoteProtocolError:
pass # llama-server closes connection after [DONE] — normal
except httpx.ConnectError: except httpx.ConnectError:
yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n" yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n"
except Exception as e: except Exception as e:

2285
app.py.bak Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 219 KiB

After

Width:  |  Height:  |  Size: 322 KiB

View File

@@ -4,7 +4,7 @@ Last updated: 2026-04-27
Owner: Gramps + Copilot Owner: Gramps + Copilot
Scope: issues, bugs, security exposures, and feature enhancements. Scope: issues, bugs, security exposures, and feature enhancements.
Total identified items: 27 Total identified items: 26
## Priority Definitions ## Priority Definitions
- P0: Critical risk or data-loss/security exposure; do first. - P0: Critical risk or data-loss/security exposure; do first.
@@ -60,23 +60,22 @@ Total identified items: 27
12. Add unit/integration tests for: remember/forget parsing, refusal detection, search fallback, SSE done/error shape. 12. Add unit/integration tests for: remember/forget parsing, refusal detection, search fallback, SSE done/error shape.
13. Add conversation title sanitization and length constraints. 13. Add conversation title sanitization and length constraints.
14. Ensure default preset semantics are correct (currently all seeded presets are marked default). 14. Ensure default preset semantics are correct (currently all seeded presets are marked default).
15. Add preflight validation for required model/preset selection and block send with clear user guidance instead of timing out.
### P2 Important Features ### P2 Important Features
16. Skills system: load markdown skill files with YAML frontmatter from skills directory. 15. Skills system: load markdown skill files with YAML frontmatter from skills directory.
17. Skills registry API: list/enable/disable skills and expose active skills to UI. 16. Skills registry API: list/enable/disable skills and expose active skills to UI.
18. Inject active skill instructions into system prompt with bounded token budget. 17. Inject active skill instructions into system prompt with bounded token budget.
19. Tool execution guardrails: allowlist, confirmation mode, and execution logs. 18. Tool execution guardrails: allowlist, confirmation mode, and execution logs.
20. Heartbeat scheduler (cron/systemd timer) for daily check-ins. 19. Heartbeat scheduler (cron/systemd timer) for daily check-ins.
21. Heartbeat endpoint for generated briefings and anomaly summaries. 20. Heartbeat endpoint for generated briefings and anomaly summaries.
22. Model info UI panel (description, updated date, best-use purpose). 21. Model info UI panel (description, updated date, best-use purpose).
23. Default model selection improvements and persistence validation. 22. Default model selection improvements and persistence validation.
24. Hidden model list support (exclude models from dropdown). 23. Hidden model list support (exclude models from dropdown).
25. Model update action from UI (trigger controlled model pull). 24. Model update action from UI (trigger controlled model pull).
### P3 Nice to Have ### P3 Nice to Have
26. Conversation search/filter and export tooling. 25. Conversation search/filter and export tooling.
27. Keyboard shortcuts, retry button, and source-link polish. 26. Keyboard shortcuts, retry button, and source-link polish.
## Maintenance Rules ## Maintenance Rules
- Keep this file as the single source of truth. - Keep this file as the single source of truth.

View File

@@ -1,4 +1,4 @@
# ⚡ JarvisChat v1.7.8 # ⚡ JarvisChat v1.7.6
![screenshot](docs/images/screenshot.png) ![screenshot](docs/images/screenshot.png)
@@ -74,7 +74,7 @@ Canonical backlog: [docs/wiki/current-wip.md](docs/wiki/current-wip.md)
Scope boundary: local-first (same-host Ollama), optional RFC1918 LAN endpoints, no public Internet AI endpoints by default. Scope boundary: local-first (same-host Ollama), optional RFC1918 LAN endpoints, no public Internet AI endpoints by default.
Total identified items: 27 Total identified items: 26
Top 10 (brief): Top 10 (brief):
@@ -113,7 +113,6 @@ Implementation status: complete (guest session by default + admin unlock + admin
16. Hide/remove model from list — exclude models from dropdown 16. Hide/remove model from list — exclude models from dropdown
17. Update model function — trigger `ollama pull` for selected model from UI 17. Update model function — trigger `ollama pull` for selected model from UI
18. Add mouseover tooltip to SEND button 18. Add mouseover tooltip to SEND button
19. Add preflight validation for required model/preset selection and show a clear warning before send to prevent avoidable timeout loops
## File Structure ## File Structure