diff --git a/app.py b/app.py
index ecd6039..f5802ae 100644
--- a/app.py
+++ b/app.py
@@ -58,6 +58,7 @@ log.addHandler(syslog_handler)
 # --- Configuration ---
 VERSION = "v1.8.0"
 OLLAMA_BASE = os.environ.get("OLLAMA_BASE", "http://localhost:11434")
+LLAMA_SERVER_BASE = os.environ.get("LLAMA_SERVER_BASE", "http://192.168.50.108:8081")
 SEARXNG_BASE = "http://localhost:8888"
 BASE_DIR = Path(__file__).parent
 DB_PATH = BASE_DIR / "jarvischat.db"
@@ -1038,7 +1039,7 @@ def get_gpu_stats() -> dict:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     log.info(f"JarvisChat v{VERSION} starting up")
-    log.info(f"Ollama: {OLLAMA_BASE}, SearXNG: {SEARXNG_BASE}")
+    log.info(f"Ollama: {OLLAMA_BASE}, llama-server: {LLAMA_SERVER_BASE}, SearXNG: {SEARXNG_BASE}")
     init_db()
     log.info(f"Memory system: {get_memory_count()} memories loaded")
     yield
@@ -1966,7 +1967,7 @@ async def explicit_search(request: Request):
             try:
                 async with client.stream(
                     "POST",
-                    f"{OLLAMA_BASE}/api/chat",
+                    f"{LLAMA_SERVER_BASE}/v1/chat/completions",
                     json={"model": model, "messages": messages, "stream": True},
                     timeout=httpx.Timeout(300.0, connect=10.0),
                 ) as resp:
@@ -2191,7 +2192,7 @@ async def chat(request: Request):
             try:
                 async with client.stream(
                     "POST",
-                    f"{OLLAMA_BASE}/api/chat",
+                    f"{LLAMA_SERVER_BASE}/v1/chat/completions",
                     json=ollama_payload,
                     timeout=httpx.Timeout(300.0, connect=10.0),
                 ) as resp:
@@ -2240,7 +2241,7 @@ async def chat(request: Request):
                         augmented_response = []
                         async with client.stream(
                             "POST",
-                            f"{OLLAMA_BASE}/api/chat",
+                            f"{LLAMA_SERVER_BASE}/v1/chat/completions",
                             json={
                                 "model": model,
                                 "messages": augmented_messages,