aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFuwn <[email protected]>2025-08-06 21:38:37 +0200
committerFuwn <[email protected]>2025-08-06 21:38:43 +0200
commitf4b2cf41909c5c5ed87e3a206ada000b7560467f (patch)
tree98166e734a80044a42d00cd35497cabd758f70a6
parentfeat(umapyai): Match web UI source display behaviour in CLI (diff)
downloadumapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.tar.xz
umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.zip
feat(umapyai): Switch to ollama library calls
-rw-r--r--pyproject.toml2
-rw-r--r--requirements-dev.lock6
-rw-r--r--requirements.lock6
-rw-r--r--src/umapyai/__init__.py37
-rw-r--r--src/umapyai/constants.py1
-rw-r--r--src/umapyai/ollama_server.py (renamed from src/umapyai/ollama.py)13
6 files changed, 29 insertions, 36 deletions
diff --git a/pyproject.toml b/pyproject.toml
index d378722..d14954d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,8 @@ dependencies = [
"flask-cors>=6.0.1",
"flask-sock>=0.7.0",
"spacy>=3.8.7",
+ "ollama>=0.5.2",
+ "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
]
readme = "README.md"
requires-python = ">= 3.8"
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6781bb3..a7de775 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -64,6 +64,8 @@ distro==1.9.0
# via posthog
durationpy==0.10
# via kubernetes
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+ # via umapyai
filelock==3.18.0
# via huggingface-hub
# via torch
@@ -100,6 +102,7 @@ httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via chromadb
+ # via ollama
huggingface-hub==0.34.1
# via sentence-transformers
# via tokenizers
@@ -166,6 +169,8 @@ numpy==2.3.2
oauthlib==3.3.1
# via kubernetes
# via requests-oauthlib
+ollama==0.5.2
+ # via umapyai
onnxruntime==1.22.1
# via chromadb
opentelemetry-api==1.35.0
@@ -222,6 +227,7 @@ pybase64==1.4.2
pydantic==2.11.7
# via chromadb
# via confection
+ # via ollama
# via spacy
# via thinc
# via weasel
diff --git a/requirements.lock b/requirements.lock
index bd090a9..f0d2f08 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -64,6 +64,8 @@ distro==1.9.0
# via posthog
durationpy==0.10
# via kubernetes
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+ # via umapyai
filelock==3.18.0
# via huggingface-hub
# via torch
@@ -100,6 +102,7 @@ httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via chromadb
+ # via ollama
huggingface-hub==0.34.1
# via sentence-transformers
# via tokenizers
@@ -166,6 +169,8 @@ numpy==2.3.2
oauthlib==3.3.1
# via kubernetes
# via requests-oauthlib
+ollama==0.5.2
+ # via umapyai
onnxruntime==1.22.1
# via chromadb
opentelemetry-api==1.35.0
@@ -220,6 +225,7 @@ pybase64==1.4.2
pydantic==2.11.7
# via chromadb
# via confection
+ # via ollama
# via spacy
# via thinc
# via weasel
diff --git a/src/umapyai/__init__.py b/src/umapyai/__init__.py
index e8abda6..b157516 100644
--- a/src/umapyai/__init__.py
+++ b/src/umapyai/__init__.py
@@ -3,16 +3,15 @@ import sys
import json
import chromadb
from sentence_transformers import SentenceTransformer
-import requests
+import ollama
from loguru import logger
from threading import Thread
from flask import Flask, send_file
from flask_cors import CORS
from flask_sock import Sock
from .constants import (ARTICLES_DIRECTORY, CHROMA_DIRECTORY, CHROMA_COLLECTION,
- CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K,
- OLLAMA_URL)
-from .ollama import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama
+ CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K)
+from .ollama_server import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama
from collections import defaultdict
from .language import clean_for_match, get_query_phrases
@@ -185,32 +184,14 @@ def main():
return merged[:top_k]
def query_ollama(prompt, context=None):
- url = f"{OLLAMA_URL}/api/generate"
- payload = {
- "model": OLLAMA_MODEL,
- "prompt": prompt,
- "stream": True,
- }
-
- if context:
- payload["context"] = context
-
try:
- response = requests.post(url, json=payload, stream=True)
-
- response.raise_for_status()
-
- for line in response.iter_lines():
- if line:
- json_response = json.loads(line)
+ for chunk in ollama.generate(
+ model=OLLAMA_MODEL, prompt=prompt, stream=True, context=context):
+ if not chunk.get("done"):
+ yield {"type": "answer_chunk", "data": chunk.get("response", "")}
+ else:
+ yield {"type": "history", "data": chunk.get("context")}
- if not json_response.get("done"):
- yield {
- "type": "answer_chunk",
- "data": json_response.get("response", "")
- }
- else:
- yield {"type": "history", "data": json_response.get("context")}
except Exception as error:
error_message = f"Error communicating with Ollama: {error}"
diff --git a/src/umapyai/constants.py b/src/umapyai/constants.py
index 207ef8b..01601a3 100644
--- a/src/umapyai/constants.py
+++ b/src/umapyai/constants.py
@@ -5,4 +5,3 @@ CHUNK_SIZE = 350 # words
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
OLLAMA_MODEL = "qwen3:14b"
TOP_K = 4
-OLLAMA_URL = "http://localhost:11434"
diff --git a/src/umapyai/ollama.py b/src/umapyai/ollama_server.py
index 73329be..9121880 100644
--- a/src/umapyai/ollama.py
+++ b/src/umapyai/ollama_server.py
@@ -1,18 +1,17 @@
-import requests
import time
import subprocess
import psutil
-from .constants import OLLAMA_URL
import os
from loguru import logger
import sys
+import ollama
def is_ollama_live():
try:
- response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2)
+ ollama.list()
- return response.status_code == 200
+ return True
except Exception:
return False
@@ -57,13 +56,13 @@ def kill_ollama(process):
def ensure_model_pulled(model):
try:
- tags = requests.get(f"{OLLAMA_URL}/api/tags").json().get("models", [])
+ tags = ollama.list().get("models", [])
if not any(model in m.get("name", "") for m in tags):
logger.info(f"Pulling model '{model}' ...")
- subprocess.run(["ollama", "pull", model], check=True)
+ ollama.pull(model)
else:
logger.success(f"Model '{model}' already pulled.")
except Exception as e:
logger.warning(f"Couldn't check/pull Ollama model: {e}")
- logger.warning("Proceeding anyway ...")
+ logger.warning("Proceeding anyway ...") \ No newline at end of file