From f4b2cf41909c5c5ed87e3a206ada000b7560467f Mon Sep 17 00:00:00 2001
From: Fuwn <contact@fuwn.me>
Date: Wed, 6 Aug 2025 21:38:37 +0200
Subject: feat(umapyai): Switch to ollama library calls

---
 pyproject.toml               |  2 ++
 requirements-dev.lock        |  6 ++++
 requirements.lock            |  6 ++++
 src/umapyai/__init__.py      | 37 ++++++------------------
 src/umapyai/constants.py     |  1 -
 src/umapyai/ollama.py        | 69 --------------------------------------------
 src/umapyai/ollama_server.py | 68 +++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 91 insertions(+), 98 deletions(-)
 delete mode 100644 src/umapyai/ollama.py
 create mode 100644 src/umapyai/ollama_server.py

diff --git a/pyproject.toml b/pyproject.toml
index d378722..d14954d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,8 @@ dependencies = [
     "flask-cors>=6.0.1",
     "flask-sock>=0.7.0",
     "spacy>=3.8.7",
+    "ollama>=0.5.2",
+    "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
 ]
 readme = "README.md"
 requires-python = ">= 3.8"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6781bb3..a7de775 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -64,6 +64,8 @@ distro==1.9.0
     # via posthog
 durationpy==0.10
     # via kubernetes
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+    # via umapyai
 filelock==3.18.0
     # via huggingface-hub
     # via torch
@@ -100,6 +102,7 @@ httptools==0.6.4
     # via uvicorn
 httpx==0.28.1
     # via chromadb
+    # via ollama
 huggingface-hub==0.34.1
     # via sentence-transformers
     # via tokenizers
@@ -166,6 +169,8 @@ numpy==2.3.2
 oauthlib==3.3.1
     # via kubernetes
     # via requests-oauthlib
+ollama==0.5.2
+    # via umapyai
 onnxruntime==1.22.1
     # via chromadb
 opentelemetry-api==1.35.0
@@ -222,6 +227,7 @@ pybase64==1.4.2
 pydantic==2.11.7
     # via chromadb
     # via confection
+    # via ollama
     # via spacy
     # via thinc
     # via weasel
diff --git a/requirements.lock b/requirements.lock
index bd090a9..f0d2f08 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -64,6 +64,8 @@ distro==1.9.0
     # via posthog
 durationpy==0.10
     # via kubernetes
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+    # via umapyai
 filelock==3.18.0
     # via huggingface-hub
     # via torch
@@ -100,6 +102,7 @@ httptools==0.6.4
     # via uvicorn
 httpx==0.28.1
     # via chromadb
+    # via ollama
 huggingface-hub==0.34.1
     # via sentence-transformers
     # via tokenizers
@@ -166,6 +169,8 @@ numpy==2.3.2
 oauthlib==3.3.1
     # via kubernetes
     # via requests-oauthlib
+ollama==0.5.2
+    # via umapyai
 onnxruntime==1.22.1
     # via chromadb
 opentelemetry-api==1.35.0
@@ -220,6 +225,7 @@ pybase64==1.4.2
 pydantic==2.11.7
     # via chromadb
     # via confection
+    # via ollama
     # via spacy
     # via thinc
     # via weasel
diff --git a/src/umapyai/__init__.py b/src/umapyai/__init__.py
index e8abda6..b157516 100644
--- a/src/umapyai/__init__.py
+++ b/src/umapyai/__init__.py
@@ -3,16 +3,15 @@ import sys
 import json
 import chromadb
 from sentence_transformers import SentenceTransformer
-import requests
+import ollama
 from loguru import logger
 from threading import Thread
 from flask import Flask, send_file
 from flask_cors import CORS
 from flask_sock import Sock
 from .constants import (ARTICLES_DIRECTORY, CHROMA_DIRECTORY, CHROMA_COLLECTION,
-                        CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K,
-                        OLLAMA_URL)
-from .ollama import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama
+                        CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K)
+from .ollama_server import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama
 from collections import defaultdict
 from .language import clean_for_match, get_query_phrases
 
@@ -185,32 +184,14 @@ def main():
       return merged[:top_k]
 
     def query_ollama(prompt, context=None):
-      url = f"{OLLAMA_URL}/api/generate"
-      payload = {
-          "model": OLLAMA_MODEL,
-          "prompt": prompt,
-          "stream": True,
-      }
-
-      if context:
-        payload["context"] = context
-
       try:
-        response = requests.post(url, json=payload, stream=True)
-
-        response.raise_for_status()
-
-        for line in response.iter_lines():
-          if line:
-            json_response = json.loads(line)
+        for chunk in ollama.generate(
+            model=OLLAMA_MODEL, prompt=prompt, stream=True, context=context):
+          if not chunk.get("done"):
+            yield {"type": "answer_chunk", "data": chunk.get("response", "")}
+          else:
+            yield {"type": "history", "data": chunk.get("context")}
 
-            if not json_response.get("done"):
-              yield {
-                  "type": "answer_chunk",
-                  "data": json_response.get("response", "")
-              }
-            else:
-              yield {"type": "history", "data": json_response.get("context")}
       except Exception as error:
         error_message = f"Error communicating with Ollama: {error}"
 
diff --git a/src/umapyai/constants.py b/src/umapyai/constants.py
index 207ef8b..01601a3 100644
--- a/src/umapyai/constants.py
+++ b/src/umapyai/constants.py
@@ -5,4 +5,3 @@ CHUNK_SIZE = 350  # words
 EMBEDDING_MODEL = "all-MiniLM-L6-v2"
 OLLAMA_MODEL = "qwen3:14b"
 TOP_K = 4
-OLLAMA_URL = "http://localhost:11434"
diff --git a/src/umapyai/ollama.py b/src/umapyai/ollama.py
deleted file mode 100644
index 73329be..0000000
--- a/src/umapyai/ollama.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import requests
-import time
-import subprocess
-import psutil
-from .constants import OLLAMA_URL
-import os
-from loguru import logger
-import sys
-
-
-def is_ollama_live():
-  try:
-    response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2)
-
-    return response.status_code == 200
-  except Exception:
-    return False
-
-
-def start_ollama_server():
-  logger.info("Starting Ollama server with OLLAMA_ORIGINS='*' ...")
-
-  environment = os.environ.copy()
-  environment["OLLAMA_ORIGINS"] = "*"
-  process = subprocess.Popen(["ollama", "serve"],
-                             env=environment,
-                             stdout=subprocess.PIPE,
-                             stderr=subprocess.STDOUT,
-                             text=True)
-
-  for _ in range(30):
-    if is_ollama_live():
-      logger.success("Ollama is now live.")
-
-      return process
-
-    time.sleep(1)
-
-  logger.error("Ollama server did not start after 30 seconds.")
-  process.terminate()
-  sys.exit(1)
-
-
-def kill_ollama(process):
-  logger.info("Killing Ollama ...")
-
-  try:
-    parent_process = psutil.Process(process.pid)
-
-    for child_process in parent_process.children(recursive=True):
-      child_process.terminate()
-
-    parent_process.terminate()
-  except Exception as error:
-    logger.error(f"Error killing Ollama: {error}")
-
-
-def ensure_model_pulled(model):
-  try:
-    tags = requests.get(f"{OLLAMA_URL}/api/tags").json().get("models", [])
-
-    if not any(model in m.get("name", "") for m in tags):
-      logger.info(f"Pulling model '{model}' ...")
-      subprocess.run(["ollama", "pull", model], check=True)
-    else:
-      logger.success(f"Model '{model}' already pulled.")
-  except Exception as e:
-    logger.warning(f"Couldn't check/pull Ollama model: {e}")
-    logger.warning("Proceeding anyway ...")
diff --git a/src/umapyai/ollama_server.py b/src/umapyai/ollama_server.py
new file mode 100644
index 0000000..9121880
--- /dev/null
+++ b/src/umapyai/ollama_server.py
@@ -0,0 +1,68 @@
+import time
+import subprocess
+import psutil
+import os
+from loguru import logger
+import sys
+import ollama
+
+
+def is_ollama_live():
+  try:
+    ollama.list()
+
+    return True
+  except Exception:
+    return False
+
+
+def start_ollama_server():
+  logger.info("Starting Ollama server with OLLAMA_ORIGINS='*' ...")
+
+  environment = os.environ.copy()
+  environment["OLLAMA_ORIGINS"] = "*"
+  process = subprocess.Popen(["ollama", "serve"],
+                             env=environment,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.STDOUT,
+                             text=True)
+
+  for _ in range(30):
+    if is_ollama_live():
+      logger.success("Ollama is now live.")
+
+      return process
+
+    time.sleep(1)
+
+  logger.error("Ollama server did not start after 30 seconds.")
+  process.terminate()
+  sys.exit(1)
+
+
+def kill_ollama(process):
+  logger.info("Killing Ollama ...")
+
+  try:
+    parent_process = psutil.Process(process.pid)
+
+    for child_process in parent_process.children(recursive=True):
+      child_process.terminate()
+
+    parent_process.terminate()
+  except Exception as error:
+    logger.error(f"Error killing Ollama: {error}")
+
+
+def ensure_model_pulled(model):
+  try:
+    tags = ollama.list().get("models", [])
+
+    if not any(model in m.get("name", "") for m in tags):
+      logger.info(f"Pulling model '{model}' ...")
+      ollama.pull(model)
+    else:
+      logger.success(f"Model '{model}' already pulled.")
+  except Exception as e:
+    logger.warning(f"Couldn't check/pull Ollama model: {e}")
+    logger.warning("Proceeding anyway ...")
\ No newline at end of file
-- 
cgit v1.2.3