diff options
| author | Fuwn <[email protected]> | 2025-08-06 21:38:37 +0200 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2025-08-06 21:38:43 +0200 |
| commit | f4b2cf41909c5c5ed87e3a206ada000b7560467f (patch) | |
| tree | 98166e734a80044a42d00cd35497cabd758f70a6 | |
| parent | feat(umapyai): Match web UI source display behaviour in CLI (diff) | |
| download | umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.tar.xz umapyai-f4b2cf41909c5c5ed87e3a206ada000b7560467f.zip | |
feat(umapyai): Switch to ollama library calls
| -rw-r--r-- | pyproject.toml | 2 | ||||
| -rw-r--r-- | requirements-dev.lock | 6 | ||||
| -rw-r--r-- | requirements.lock | 6 | ||||
| -rw-r--r-- | src/umapyai/__init__.py | 37 | ||||
| -rw-r--r-- | src/umapyai/constants.py | 1 | ||||
| -rw-r--r-- | src/umapyai/ollama_server.py (renamed from src/umapyai/ollama.py) | 13 |
6 files changed, 29 insertions, 36 deletions
diff --git a/pyproject.toml b/pyproject.toml index d378722..d14954d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,8 @@ dependencies = [ "flask-cors>=6.0.1", "flask-sock>=0.7.0", "spacy>=3.8.7", + "ollama>=0.5.2", + "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl", ] readme = "README.md" requires-python = ">= 3.8" diff --git a/requirements-dev.lock b/requirements-dev.lock index 6781bb3..a7de775 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -64,6 +64,8 @@ distro==1.9.0 # via posthog durationpy==0.10 # via kubernetes +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl + # via umapyai filelock==3.18.0 # via huggingface-hub # via torch @@ -100,6 +102,7 @@ httptools==0.6.4 # via uvicorn httpx==0.28.1 # via chromadb + # via ollama huggingface-hub==0.34.1 # via sentence-transformers # via tokenizers @@ -166,6 +169,8 @@ numpy==2.3.2 oauthlib==3.3.1 # via kubernetes # via requests-oauthlib +ollama==0.5.2 + # via umapyai onnxruntime==1.22.1 # via chromadb opentelemetry-api==1.35.0 @@ -222,6 +227,7 @@ pybase64==1.4.2 pydantic==2.11.7 # via chromadb # via confection + # via ollama # via spacy # via thinc # via weasel diff --git a/requirements.lock b/requirements.lock index bd090a9..f0d2f08 100644 --- a/requirements.lock +++ b/requirements.lock @@ -64,6 +64,8 @@ distro==1.9.0 # via posthog durationpy==0.10 # via kubernetes +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl + # via umapyai filelock==3.18.0 # via huggingface-hub # via torch @@ -100,6 +102,7 @@ httptools==0.6.4 # via uvicorn httpx==0.28.1 # via chromadb + # via ollama huggingface-hub==0.34.1 # via sentence-transformers # via tokenizers @@ -166,6 +169,8 @@ numpy==2.3.2 oauthlib==3.3.1 # via kubernetes # via requests-oauthlib +ollama==0.5.2 + # via umapyai onnxruntime==1.22.1 # via chromadb opentelemetry-api==1.35.0 @@ -220,6 +225,7 @@ pybase64==1.4.2 pydantic==2.11.7 # via chromadb # via confection + # via ollama # via spacy # via thinc # via weasel diff --git a/src/umapyai/__init__.py b/src/umapyai/__init__.py index e8abda6..b157516 100644 --- a/src/umapyai/__init__.py +++ b/src/umapyai/__init__.py @@ -3,16 +3,15 @@ import sys import json import chromadb from sentence_transformers import SentenceTransformer -import requests +import ollama from loguru import logger from threading import Thread from flask import Flask, send_file from flask_cors import CORS from flask_sock import Sock from .constants import (ARTICLES_DIRECTORY, CHROMA_DIRECTORY, CHROMA_COLLECTION, - CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K, - OLLAMA_URL) -from .ollama import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama + CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K) +from .ollama_server import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama from collections import defaultdict from .language import clean_for_match, get_query_phrases @@ -185,32 +184,14 @@ def main(): return merged[:top_k] def query_ollama(prompt, context=None): - url = f"{OLLAMA_URL}/api/generate" - payload = { - "model": OLLAMA_MODEL, - "prompt": prompt, - "stream": True, - } - - if context: - payload["context"] = context - try: - response = requests.post(url, json=payload, stream=True) - - response.raise_for_status() - - for line in response.iter_lines(): - if line: - json_response = json.loads(line) + for chunk in ollama.generate( + model=OLLAMA_MODEL, prompt=prompt, stream=True, context=context): + if not chunk.get("done"): + yield {"type": "answer_chunk", "data": chunk.get("response", "")} + else: + yield {"type": "history", "data": chunk.get("context")} - if not json_response.get("done"): - yield { - "type": "answer_chunk", - "data": json_response.get("response", "") - } - else: - yield {"type": "history", "data": json_response.get("context")} except Exception as error: error_message = f"Error communicating with Ollama: {error}" diff --git a/src/umapyai/constants.py b/src/umapyai/constants.py index 207ef8b..01601a3 100644 --- a/src/umapyai/constants.py +++ b/src/umapyai/constants.py @@ -5,4 +5,3 @@ CHUNK_SIZE = 350 # words EMBEDDING_MODEL = "all-MiniLM-L6-v2" OLLAMA_MODEL = "qwen3:14b" TOP_K = 4 -OLLAMA_URL = "http://localhost:11434" diff --git a/src/umapyai/ollama.py b/src/umapyai/ollama_server.py index 73329be..9121880 100644 --- a/src/umapyai/ollama.py +++ b/src/umapyai/ollama_server.py @@ -1,18 +1,17 @@ -import requests import time import subprocess import psutil -from .constants import OLLAMA_URL import os from loguru import logger import sys +import ollama def is_ollama_live(): try: - response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2) + ollama.list() - return response.status_code == 200 + return True except Exception: return False @@ -57,13 +56,13 @@ def kill_ollama(process): def ensure_model_pulled(model): try: - tags = requests.get(f"{OLLAMA_URL}/api/tags").json().get("models", []) + tags = ollama.list().get("models", []) if not any(model in m.get("name", "") for m in tags): logger.info(f"Pulling model '{model}' ...") - subprocess.run(["ollama", "pull", model], check=True) + ollama.pull(model) else: logger.success(f"Model '{model}' already pulled.") except Exception as e: logger.warning(f"Couldn't check/pull Ollama model: {e}") - logger.warning("Proceeding anyway ...") + logger.warning("Proceeding anyway ...")
\ No newline at end of file |