Compare commits
5 Commits
v1.8.0
...
dd475a6f2d
| Author | SHA1 | Date | |
|---|---|---|---|
| dd475a6f2d | |||
| 6de3a1e154 | |||
| 5a652c1b74 | |||
| 18bca027de | |||
| 36bca94840 |
167
app.py
167
app.py
@@ -56,8 +56,9 @@ syslog_handler.setFormatter(
|
|||||||
log.addHandler(syslog_handler)
|
log.addHandler(syslog_handler)
|
||||||
|
|
||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
VERSION = "1.7.6"
|
VERSION = "v1.8.0"
|
||||||
OLLAMA_BASE = "http://localhost:11434"
|
OLLAMA_BASE = os.environ.get("OLLAMA_BASE", "http://localhost:11434")
|
||||||
|
LLAMA_SERVER_BASE = os.environ.get("LLAMA_SERVER_BASE", "http://192.168.50.108:8081")
|
||||||
SEARXNG_BASE = "http://localhost:8888"
|
SEARXNG_BASE = "http://localhost:8888"
|
||||||
BASE_DIR = Path(__file__).parent
|
BASE_DIR = Path(__file__).parent
|
||||||
DB_PATH = BASE_DIR / "jarvischat.db"
|
DB_PATH = BASE_DIR / "jarvischat.db"
|
||||||
@@ -1038,7 +1039,7 @@ def get_gpu_stats() -> dict:
|
|||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
log.info(f"JarvisChat v{VERSION} starting up")
|
log.info(f"JarvisChat v{VERSION} starting up")
|
||||||
log.info(f"Ollama: {OLLAMA_BASE}, SearXNG: {SEARXNG_BASE}")
|
log.info(f"Ollama: {OLLAMA_BASE}, llama-server: {LLAMA_SERVER_BASE}, SearXNG: {SEARXNG_BASE}")
|
||||||
init_db()
|
init_db()
|
||||||
log.info(f"Memory system: {get_memory_count()} memories loaded")
|
log.info(f"Memory system: {get_memory_count()} memories loaded")
|
||||||
yield
|
yield
|
||||||
@@ -1480,14 +1481,24 @@ async def index(request: Request):
|
|||||||
return templates.TemplateResponse(request, "index.html", {"version": VERSION})
|
return templates.TemplateResponse(request, "index.html", {"version": VERSION})
|
||||||
|
|
||||||
|
|
||||||
|
#@app.get("/api/models")
|
||||||
|
#async def list_models():
|
||||||
|
# async with httpx.AsyncClient() as client:
|
||||||
|
# try:
|
||||||
|
# resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10)
|
||||||
|
# return resp.json()
|
||||||
|
# except httpx.ConnectError:
|
||||||
|
# raise HTTPException(status_code=502, detail="Cannot connect to Ollama.")
|
||||||
@app.get("/api/models")
|
@app.get("/api/models")
|
||||||
async def list_models():
|
async def list_models():
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
resp = await client.get(f"{OLLAMA_BASE}/api/tags", timeout=10)
|
resp = await client.get(f"{OLLAMA_BASE}/v1/models", timeout=10)
|
||||||
return resp.json()
|
data = resp.json()
|
||||||
|
models = [{"name": m["id"], "model": m["id"]} for m in data.get("data", [])]
|
||||||
|
return {"models": models}
|
||||||
except httpx.ConnectError:
|
except httpx.ConnectError:
|
||||||
raise HTTPException(status_code=502, detail="Cannot connect to Ollama.")
|
raise HTTPException(status_code=502, detail="Cannot connect to llama-server.")
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/ps")
|
@app.get("/api/ps")
|
||||||
@@ -1956,22 +1967,18 @@ async def explicit_search(request: Request):
|
|||||||
try:
|
try:
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
f"{OLLAMA_BASE}/api/chat",
|
f"{LLAMA_SERVER_BASE}/v1/chat/completions",
|
||||||
json={"model": model, "messages": messages, "stream": True},
|
json={"model": model, "messages": messages, "stream": True},
|
||||||
timeout=httpx.Timeout(300.0, connect=10.0),
|
timeout=httpx.Timeout(300.0, connect=10.0),
|
||||||
) as resp:
|
) as resp:
|
||||||
async for line in resp.aiter_lines():
|
async for line in resp.aiter_lines():
|
||||||
if line.strip():
|
if line.strip():
|
||||||
try:
|
token, done, _ = parse_llama_stream_chunk(line)
|
||||||
chunk = json.loads(line)
|
if token:
|
||||||
if "message" in chunk and "content" in chunk["message"]:
|
full_response.append(token)
|
||||||
token = chunk["message"]["content"]
|
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
|
||||||
full_response.append(token)
|
if done:
|
||||||
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
|
break
|
||||||
if chunk.get("done"):
|
|
||||||
break
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
incident_key = log_incident(
|
incident_key = log_incident(
|
||||||
"search_summarization_stream",
|
"search_summarization_stream",
|
||||||
@@ -2010,7 +2017,32 @@ async def explicit_search(request: Request):
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
def build_system_prompt(db, extra_prompt="", user_message=""):
|
|
||||||
|
async def query_rag(query: str, limit: int = 3) -> list[dict]:
|
||||||
|
"""Query Qdrant for semantically relevant chunks."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
embed_resp = await client.post(
|
||||||
|
"http://192.168.50.108:11434/api/embeddings",
|
||||||
|
json={"model": "mxbai-embed-large", "prompt": query},
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
if embed_resp.status_code != 200:
|
||||||
|
return []
|
||||||
|
vector = embed_resp.json()["embedding"]
|
||||||
|
search_resp = await client.post(
|
||||||
|
"http://192.168.50.108:6333/collections/jarvis_rag/points/search",
|
||||||
|
json={"vector": vector, "limit": limit, "with_payload": True},
|
||||||
|
timeout=10.0,
|
||||||
|
)
|
||||||
|
if search_resp.status_code != 200:
|
||||||
|
return []
|
||||||
|
return search_resp.json().get("result", [])
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(f"RAG query error: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def build_system_prompt(db, extra_prompt="", user_message=""):
|
||||||
"""Build the full system prompt: profile + memories + preset."""
|
"""Build the full system prompt: profile + memories + preset."""
|
||||||
parts = []
|
parts = []
|
||||||
settings = {
|
settings = {
|
||||||
@@ -2030,6 +2062,17 @@ def build_system_prompt(db, extra_prompt="", user_message=""):
|
|||||||
parts.append("## Relevant Context from Memory\n" + "\n".join(memory_lines))
|
parts.append("## Relevant Context from Memory\n" + "\n".join(memory_lines))
|
||||||
log.debug(f"Injected {len(memories)} memories into context")
|
log.debug(f"Injected {len(memories)} memories into context")
|
||||||
|
|
||||||
|
if user_message:
|
||||||
|
try:
|
||||||
|
rag_results = await query_rag(user_message)
|
||||||
|
if rag_results:
|
||||||
|
rag_lines = [r["payload"]["text"] for r in rag_results if r["score"] > 0.25]
|
||||||
|
if rag_lines:
|
||||||
|
parts.append("## Retrieved Context\n" + "\n\n---\n\n".join(rag_lines))
|
||||||
|
log.warning(f"RAG injected {len(rag_lines)} chunks into context")
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(f"RAG injection error: {e}")
|
||||||
|
|
||||||
if settings.get("skills_enabled", "true") == "true":
|
if settings.get("skills_enabled", "true") == "true":
|
||||||
active_skills = [s for s in list_skills_with_state(db) if s["enabled"]]
|
active_skills = [s for s in list_skills_with_state(db) if s["enabled"]]
|
||||||
if active_skills:
|
if active_skills:
|
||||||
@@ -2041,6 +2084,42 @@ def build_system_prompt(db, extra_prompt="", user_message=""):
|
|||||||
return "\n\n---\n\n".join(parts) if parts else ""
|
return "\n\n---\n\n".join(parts) if parts else ""
|
||||||
|
|
||||||
|
|
||||||
|
def parse_llama_stream_chunk(line: str) -> tuple[str | None, bool, dict]:
|
||||||
|
"""Parse OpenAI-compatible SSE chunk. Returns (token, is_done, stats)."""
|
||||||
|
if line.startswith("data: "):
|
||||||
|
line = line[6:]
|
||||||
|
if line.strip() == "[DONE]":
|
||||||
|
return None, True, {}
|
||||||
|
try:
|
||||||
|
chunk = json.loads(line)
|
||||||
|
# OpenAI format
|
||||||
|
choices = chunk.get("choices", [])
|
||||||
|
if choices:
|
||||||
|
delta = choices[0].get("delta", {})
|
||||||
|
token = delta.get("content")
|
||||||
|
finish = choices[0].get("finish_reason")
|
||||||
|
stats = {}
|
||||||
|
if finish == "stop":
|
||||||
|
usage = chunk.get("usage", {})
|
||||||
|
stats["tokens_per_sec"] = usage.get("tokens_per_second", 0.0)
|
||||||
|
return token, finish == "stop", stats
|
||||||
|
# Ollama format fallback
|
||||||
|
if "message" in chunk and "content" in chunk["message"]:
|
||||||
|
token = chunk["message"]["content"]
|
||||||
|
done = chunk.get("done", False)
|
||||||
|
stats = {}
|
||||||
|
if done:
|
||||||
|
eval_count = chunk.get("eval_count", 0)
|
||||||
|
eval_duration = chunk.get("eval_duration", 0)
|
||||||
|
stats["tokens_per_sec"] = (
|
||||||
|
(eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0
|
||||||
|
)
|
||||||
|
return token, done, stats
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
return None, False, {}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/chat")
|
@app.post("/api/chat")
|
||||||
async def chat(request: Request):
|
async def chat(request: Request):
|
||||||
body = await read_json_body(request, BODY_LIMIT_CHAT_BYTES)
|
body = await read_json_body(request, BODY_LIMIT_CHAT_BYTES)
|
||||||
@@ -2086,7 +2165,7 @@ async def chat(request: Request):
|
|||||||
"SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC",
|
"SELECT role, content FROM messages WHERE conversation_id = ? ORDER BY id ASC",
|
||||||
(conv_id,),
|
(conv_id,),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
system_prompt = build_system_prompt(db, preset_prompt, user_message)
|
system_prompt = await build_system_prompt(db, preset_prompt, user_message)
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
messages = []
|
messages = []
|
||||||
@@ -2099,7 +2178,6 @@ async def chat(request: Request):
|
|||||||
"model": model,
|
"model": model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
"logprobs": True,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async def stream_response():
|
async def stream_response():
|
||||||
@@ -2114,31 +2192,18 @@ async def chat(request: Request):
|
|||||||
try:
|
try:
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
f"{OLLAMA_BASE}/api/chat",
|
f"{LLAMA_SERVER_BASE}/v1/chat/completions",
|
||||||
json=ollama_payload,
|
json=ollama_payload,
|
||||||
timeout=httpx.Timeout(300.0, connect=10.0),
|
timeout=httpx.Timeout(300.0, connect=10.0),
|
||||||
) as resp:
|
) as resp:
|
||||||
async for line in resp.aiter_lines():
|
async for line in resp.aiter_lines():
|
||||||
if line.strip():
|
if line.strip():
|
||||||
try:
|
token, done, stats = parse_llama_stream_chunk(line)
|
||||||
chunk = json.loads(line)
|
if token:
|
||||||
if "message" in chunk and "content" in chunk["message"]:
|
full_response.append(token)
|
||||||
token = chunk["message"]["content"]
|
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
|
||||||
full_response.append(token)
|
if done:
|
||||||
yield f"data: {json.dumps({'token': token, 'conversation_id': conv_id})}\n\n"
|
tokens_per_sec = stats.get("tokens_per_sec", 0.0)
|
||||||
if "logprobs" in chunk and chunk["logprobs"]:
|
|
||||||
all_logprobs.extend(chunk["logprobs"])
|
|
||||||
if chunk.get("done"):
|
|
||||||
eval_count = chunk.get("eval_count", 0)
|
|
||||||
eval_duration = chunk.get("eval_duration", 0)
|
|
||||||
tokens_per_sec = (
|
|
||||||
(eval_count / (eval_duration / 1e9))
|
|
||||||
if eval_duration > 0
|
|
||||||
else 0
|
|
||||||
)
|
|
||||||
break
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
assistant_msg = "".join(full_response)
|
assistant_msg = "".join(full_response)
|
||||||
perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0
|
perplexity = calculate_perplexity(all_logprobs) if all_logprobs else 0.0
|
||||||
@@ -2176,7 +2241,7 @@ async def chat(request: Request):
|
|||||||
augmented_response = []
|
augmented_response = []
|
||||||
async with client.stream(
|
async with client.stream(
|
||||||
"POST",
|
"POST",
|
||||||
f"{OLLAMA_BASE}/api/chat",
|
f"{LLAMA_SERVER_BASE}/v1/chat/completions",
|
||||||
json={
|
json={
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": augmented_messages,
|
"messages": augmented_messages,
|
||||||
@@ -2186,19 +2251,11 @@ async def chat(request: Request):
|
|||||||
) as resp2:
|
) as resp2:
|
||||||
async for line in resp2.aiter_lines():
|
async for line in resp2.aiter_lines():
|
||||||
if line.strip():
|
if line.strip():
|
||||||
try:
|
token2, done2, _ = parse_llama_stream_chunk(line)
|
||||||
chunk = json.loads(line)
|
if token2:
|
||||||
if (
|
augmented_response.append(token2)
|
||||||
"message" in chunk
|
if done2:
|
||||||
and "content" in chunk["message"]
|
break
|
||||||
):
|
|
||||||
augmented_response.append(
|
|
||||||
chunk["message"]["content"]
|
|
||||||
)
|
|
||||||
if chunk.get("done"):
|
|
||||||
break
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
raw_response = "".join(augmented_response) or assistant_msg
|
raw_response = "".join(augmented_response) or assistant_msg
|
||||||
cleaned_response = clean_hedging(raw_response)
|
cleaned_response = clean_hedging(raw_response)
|
||||||
@@ -2251,6 +2308,8 @@ async def chat(request: Request):
|
|||||||
|
|
||||||
yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n"
|
yield f"data: {json.dumps({'done': True, 'conversation_id': conv_id, 'perplexity': round(perplexity, 2), 'tokens_per_sec': round(tokens_per_sec, 1)})}\n\n"
|
||||||
|
|
||||||
|
except httpx.RemoteProtocolError:
|
||||||
|
pass # llama-server closes connection after [DONE] — normal
|
||||||
except httpx.ConnectError:
|
except httpx.ConnectError:
|
||||||
yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n"
|
yield f"data: {json.dumps({'error': 'Cannot connect to Ollama. Is it running?'})}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
2285
app.py.bak
Normal file
2285
app.py.bak
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
Before Width: | Height: | Size: 322 KiB After Width: | Height: | Size: 219 KiB |
@@ -4,7 +4,7 @@ Last updated: 2026-04-27
|
|||||||
Owner: Gramps + Copilot
|
Owner: Gramps + Copilot
|
||||||
Scope: issues, bugs, security exposures, and feature enhancements.
|
Scope: issues, bugs, security exposures, and feature enhancements.
|
||||||
|
|
||||||
Total identified items: 26
|
Total identified items: 27
|
||||||
|
|
||||||
## Priority Definitions
|
## Priority Definitions
|
||||||
- P0: Critical risk or data-loss/security exposure; do first.
|
- P0: Critical risk or data-loss/security exposure; do first.
|
||||||
@@ -60,22 +60,23 @@ Total identified items: 26
|
|||||||
12. Add unit/integration tests for: remember/forget parsing, refusal detection, search fallback, SSE done/error shape.
|
12. Add unit/integration tests for: remember/forget parsing, refusal detection, search fallback, SSE done/error shape.
|
||||||
13. Add conversation title sanitization and length constraints.
|
13. Add conversation title sanitization and length constraints.
|
||||||
14. Ensure default preset semantics are correct (currently all seeded presets are marked default).
|
14. Ensure default preset semantics are correct (currently all seeded presets are marked default).
|
||||||
|
15. Add preflight validation for required model/preset selection and block send with clear user guidance instead of timing out.
|
||||||
|
|
||||||
### P2 Important Features
|
### P2 Important Features
|
||||||
15. Skills system: load markdown skill files with YAML frontmatter from skills directory.
|
16. Skills system: load markdown skill files with YAML frontmatter from skills directory.
|
||||||
16. Skills registry API: list/enable/disable skills and expose active skills to UI.
|
17. Skills registry API: list/enable/disable skills and expose active skills to UI.
|
||||||
17. Inject active skill instructions into system prompt with bounded token budget.
|
18. Inject active skill instructions into system prompt with bounded token budget.
|
||||||
18. Tool execution guardrails: allowlist, confirmation mode, and execution logs.
|
19. Tool execution guardrails: allowlist, confirmation mode, and execution logs.
|
||||||
19. Heartbeat scheduler (cron/systemd timer) for daily check-ins.
|
20. Heartbeat scheduler (cron/systemd timer) for daily check-ins.
|
||||||
20. Heartbeat endpoint for generated briefings and anomaly summaries.
|
21. Heartbeat endpoint for generated briefings and anomaly summaries.
|
||||||
21. Model info UI panel (description, updated date, best-use purpose).
|
22. Model info UI panel (description, updated date, best-use purpose).
|
||||||
22. Default model selection improvements and persistence validation.
|
23. Default model selection improvements and persistence validation.
|
||||||
23. Hidden model list support (exclude models from dropdown).
|
24. Hidden model list support (exclude models from dropdown).
|
||||||
24. Model update action from UI (trigger controlled model pull).
|
25. Model update action from UI (trigger controlled model pull).
|
||||||
|
|
||||||
### P3 Nice to Have
|
### P3 Nice to Have
|
||||||
25. Conversation search/filter and export tooling.
|
26. Conversation search/filter and export tooling.
|
||||||
26. Keyboard shortcuts, retry button, and source-link polish.
|
27. Keyboard shortcuts, retry button, and source-link polish.
|
||||||
|
|
||||||
## Maintenance Rules
|
## Maintenance Rules
|
||||||
- Keep this file as the single source of truth.
|
- Keep this file as the single source of truth.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# ⚡ JarvisChat v1.7.6
|
# ⚡ JarvisChat v1.7.8
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -74,7 +74,7 @@ Canonical backlog: [docs/wiki/current-wip.md](docs/wiki/current-wip.md)
|
|||||||
|
|
||||||
Scope boundary: local-first (same-host Ollama), optional RFC1918 LAN endpoints, no public Internet AI endpoints by default.
|
Scope boundary: local-first (same-host Ollama), optional RFC1918 LAN endpoints, no public Internet AI endpoints by default.
|
||||||
|
|
||||||
Total identified items: 26
|
Total identified items: 27
|
||||||
|
|
||||||
Top 10 (brief):
|
Top 10 (brief):
|
||||||
|
|
||||||
@@ -113,6 +113,7 @@ Implementation status: complete (guest session by default + admin unlock + admin
|
|||||||
16. Hide/remove model from list — exclude models from dropdown
|
16. Hide/remove model from list — exclude models from dropdown
|
||||||
17. Update model function — trigger `ollama pull` for selected model from UI
|
17. Update model function — trigger `ollama pull` for selected model from UI
|
||||||
18. Add mouseover tooltip to SEND button
|
18. Add mouseover tooltip to SEND button
|
||||||
|
19. Add preflight validation for required model/preset selection and show a clear warning before send to prevent avoidable timeout loops
|
||||||
|
|
||||||
## File Structure
|
## File Structure
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user