feat(umapyai): Switch to ollama library calls

author: Fuwn <[email protected]> 2025-08-06 21:38:37 +0200
committer: Fuwn <[email protected]> 2025-08-06 21:38:43 +0200
commit: f4b2cf41909c5c5ed87e3a206ada000b7560467f (patch)
tree: 98166e734a80044a42d00cd35497cabd758f70a6
parent: feat(umapyai): Match web UI source display behaviour in CLI (diff)
download: umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.tar.xz
umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.zip
6 files changed, 29 insertions, 36 deletions
diff --git a/pyproject.toml b/pyproject.toml
index d378722..d14954d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,8 @@ dependencies = [
     "flask-cors>=6.0.1",
     "flask-sock>=0.7.0",
     "spacy>=3.8.7",
+    "ollama>=0.5.2",
+    "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
 ]
 readme = "README.md"
 requires-python = ">= 3.8"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6781bb3..a7de775 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -64,6 +64,8 @@ distro==1.9.0
     # via posthog
 durationpy==0.10
     # via kubernetes
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+    # via umapyai
 filelock==3.18.0
     # via huggingface-hub
     # via torch
@@ -100,6 +102,7 @@ httptools==0.6.4
     # via uvicorn
 httpx==0.28.1
     # via chromadb
+    # via ollama
 huggingface-hub==0.34.1
     # via sentence-transformers
     # via tokenizers
@@ -166,6 +169,8 @@ numpy==2.3.2
 oauthlib==3.3.1
     # via kubernetes
     # via requests-oauthlib
+ollama==0.5.2
+    # via umapyai
 onnxruntime==1.22.1
     # via chromadb
 opentelemetry-api==1.35.0
@@ -222,6 +227,7 @@ pybase64==1.4.2
 pydantic==2.11.7
     # via chromadb
     # via confection
+    # via ollama
     # via spacy
     # via thinc
     # via weasel
diff --git a/requirements.lock b/requirements.lock
index bd090a9..f0d2f08 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -64,6 +64,8 @@ distro==1.9.0
     # via posthog
 durationpy==0.10
     # via kubernetes
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+    # via umapyai
 filelock==3.18.0
     # via huggingface-hub
     # via torch
@@ -100,6 +102,7 @@ httptools==0.6.4
     # via uvicorn
 httpx==0.28.1
     # via chromadb
+    # via ollama
 huggingface-hub==0.34.1
     # via sentence-transformers
     # via tokenizers
@@ -166,6 +169,8 @@ numpy==2.3.2
 oauthlib==3.3.1
     # via kubernetes
     # via requests-oauthlib
+ollama==0.5.2
+    # via umapyai
 onnxruntime==1.22.1
     # via chromadb
 opentelemetry-api==1.35.0
@@ -220,6 +225,7 @@ pybase64==1.4.2
 pydantic==2.11.7
     # via chromadb
     # via confection
+    # via ollama
     # via spacy
     # via thinc
     # via weasel
diff --git a/src/umapyai/__init__.py b/src/umapyai/__init__.py
index e8abda6..b157516 100644
--- a/src/umapyai/__init__.py
+++ b/src/umapyai/__init__.py
@@ -3,16 +3,15 @@ import sys
 import json
 import chromadb
 from sentence_transformers import SentenceTransformer
-import requests
+import ollama
 from loguru import logger
 from threading import Thread
 from flask import Flask, send_file
 from flask_cors import CORS
 from flask_sock import Sock
 from .constants import (ARTICLES_DIRECTORY, CHROMA_DIRECTORY, CHROMA_COLLECTION,
-                        CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K,
-                        OLLAMA_URL)
-from .ollama import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama
+                        CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K)
+from .ollama_server import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama
 from collections import defaultdict
 from .language import clean_for_match, get_query_phrases
 
@@ -185,32 +184,14 @@ def main():
       return merged[:top_k]
 
     def query_ollama(prompt, context=None):
-      url = f"{OLLAMA_URL}/api/generate"
-      payload = {
-          "model": OLLAMA_MODEL,
-          "prompt": prompt,
-          "stream": True,
-      }
-
-      if context:
-        payload["context"] = context
-
       try:
-        response = requests.post(url, json=payload, stream=True)
-
-        response.raise_for_status()
-
-        for line in response.iter_lines():
-          if line:
-            json_response = json.loads(line)
+        for chunk in ollama.generate(
+            model=OLLAMA_MODEL, prompt=prompt, stream=True, context=context):
+          if not chunk.get("done"):
+            yield {"type": "answer_chunk", "data": chunk.get("response", "")}
+          else:
+            yield {"type": "history", "data": chunk.get("context")}
 
-            if not json_response.get("done"):
-              yield {
-                  "type": "answer_chunk",
-                  "data": json_response.get("response", "")
-              }
-            else:
-              yield {"type": "history", "data": json_response.get("context")}
       except Exception as error:
         error_message = f"Error communicating with Ollama: {error}"
 
diff --git a/src/umapyai/constants.py b/src/umapyai/constants.py
index 207ef8b..01601a3 100644
--- a/src/umapyai/constants.py
+++ b/src/umapyai/constants.py
@@ -5,4 +5,3 @@ CHUNK_SIZE = 350  # words
 EMBEDDING_MODEL = "all-MiniLM-L6-v2"
 OLLAMA_MODEL = "qwen3:14b"
 TOP_K = 4
-OLLAMA_URL = "http://localhost:11434"
diff --git a/src/umapyai/ollama.py b/src/umapyai/ollama_server.py
index 73329be..9121880 100644
--- a/src/umapyai/ollama.py
+++ b/src/umapyai/ollama_server.py
@@ -1,18 +1,17 @@
-import requests
 import time
 import subprocess
 import psutil
-from .constants import OLLAMA_URL
 import os
 from loguru import logger
 import sys
+import ollama
 
 
 def is_ollama_live():
   try:
-    response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2)
+    ollama.list()
 
-    return response.status_code == 200
+    return True
   except Exception:
     return False
 
@@ -57,13 +56,13 @@ def kill_ollama(process):
 
 def ensure_model_pulled(model):
   try:
-    tags = requests.get(f"{OLLAMA_URL}/api/tags").json().get("models", [])
+    tags = ollama.list().get("models", [])
 
     if not any(model in m.get("name", "") for m in tags):
       logger.info(f"Pulling model '{model}' ...")
-      subprocess.run(["ollama", "pull", model], check=True)
+      ollama.pull(model)
     else:
       logger.success(f"Model '{model}' already pulled.")
   except Exception as e:
     logger.warning(f"Couldn't check/pull Ollama model: {e}")
-    logger.warning("Proceeding anyway ...")
+    logger.warning("Proceeding anyway ...")
+\ No newline at end of file
author	Fuwn <[email protected]>	2025-08-06 21:38:37 +0200
committer	Fuwn <[email protected]>	2025-08-06 21:38:43 +0200
commit	f4b2cf41909c5c5ed87e3a206ada000b7560467f (patch)
tree	98166e734a80044a42d00cd35497cabd758f70a6
parent	feat(umapyai): Match web UI source display behaviour in CLI (diff)
download	umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.tar.xz umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.zip