refactor(arch): modular package structure — split monolithic app.py into config/db/auth/memory/search/rag/gpu + routers/
- config.py: all constants, env vars, limits, skill registry, profiles - db.py: schema init, connection factory, skill state helpers - security.py: PIN hashing, audit logging, rate limiting, CSRF, request helpers - auth.py: session management, PIN verify, auth routes - memory.py: FTS5 CRUD + remember/forget command processing - search.py: SearXNG integration, perplexity scoring, refusal/hedge detection - gpu.py: rocm-smi stats - rag.py: Qdrant vector search + system prompt assembly - routers/: conversations, memories, models, presets, profile, settings, skills, chat, search - app.py: slim entry point, middleware, router registration only Bumps to v1.9.0
This commit is contained in:
78
routers/models.py
Normal file
78
routers/models.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""
|
||||
JarvisChat routers - Model listing, system stats.
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
import psutil
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
|
||||
from config import OLLAMA_BASE
|
||||
from gpu import get_gpu_stats
|
||||
from security import read_json_body, BODY_LIMIT_DEFAULT_BYTES
|
||||
|
||||
log = logging.getLogger("jarvischat")
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/api/models")
|
||||
async def list_models():
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(f"{OLLAMA_BASE}/v1/models", timeout=10)
|
||||
data = resp.json()
|
||||
models = [{"name": m["id"], "model": m["id"]} for m in data.get("data", [])]
|
||||
return {"models": models}
|
||||
except httpx.ConnectError:
|
||||
raise HTTPException(status_code=502, detail="Cannot connect to llama-server.")
|
||||
|
||||
|
||||
@router.get("/api/ps")
|
||||
async def running_models():
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(f"{OLLAMA_BASE}/api/ps", timeout=10)
|
||||
return resp.json()
|
||||
except httpx.ConnectError:
|
||||
raise HTTPException(status_code=502, detail="Cannot connect to Ollama.")
|
||||
|
||||
|
||||
@router.post("/api/show")
|
||||
async def show_model(request: Request):
|
||||
from security import BODY_LIMIT_DEFAULT_BYTES
|
||||
body = await read_json_body(request, BODY_LIMIT_DEFAULT_BYTES)
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.post(f"{OLLAMA_BASE}/api/show", json=body, timeout=10)
|
||||
return resp.json()
|
||||
except httpx.ConnectError:
|
||||
raise HTTPException(status_code=502, detail="Cannot connect to Ollama.")
|
||||
|
||||
|
||||
@router.get("/api/stats")
|
||||
async def system_stats():
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
memory = psutil.virtual_memory()
|
||||
gpu = get_gpu_stats()
|
||||
return {
|
||||
"cpu_percent": round(cpu_percent, 1),
|
||||
"memory_percent": round(memory.percent, 1),
|
||||
"memory_used_gb": round(memory.used / (1024**3), 1),
|
||||
"memory_total_gb": round(memory.total / (1024**3), 1),
|
||||
"gpu_percent": gpu["gpu_percent"],
|
||||
"vram_percent": gpu["vram_percent"],
|
||||
"gpu_available": gpu["available"],
|
||||
}
|
||||
|
||||
|
||||
@router.get("/api/search/status")
|
||||
async def search_status():
|
||||
from config import SEARXNG_BASE
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(f"{SEARXNG_BASE}/search",
|
||||
params={"q": "test", "format": "json"}, timeout=5)
|
||||
return {"available": resp.status_code == 200}
|
||||
except Exception:
|
||||
return {"available": False}
|
||||
Reference in New Issue
Block a user