From f4b2cf41909c5c5ed87e3a206ada000b7560467f Mon Sep 17 00:00:00 2001 From: Fuwn Date: Wed, 6 Aug 2025 21:38:37 +0200 Subject: feat(umapyai): Switch to ollama library calls --- pyproject.toml | 2 ++ requirements-dev.lock | 6 ++++ requirements.lock | 6 ++++ src/umapyai/__init__.py | 37 ++++++------------------ src/umapyai/constants.py | 1 - src/umapyai/ollama.py | 69 -------------------------------------------- src/umapyai/ollama_server.py | 68 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 91 insertions(+), 98 deletions(-) delete mode 100644 src/umapyai/ollama.py create mode 100644 src/umapyai/ollama_server.py diff --git a/pyproject.toml b/pyproject.toml index d378722..d14954d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,8 @@ dependencies = [ "flask-cors>=6.0.1", "flask-sock>=0.7.0", "spacy>=3.8.7", + "ollama>=0.5.2", + "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl", ] readme = "README.md" requires-python = ">= 3.8" diff --git a/requirements-dev.lock b/requirements-dev.lock index 6781bb3..a7de775 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -64,6 +64,8 @@ distro==1.9.0 # via posthog durationpy==0.10 # via kubernetes +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl + # via umapyai filelock==3.18.0 # via huggingface-hub # via torch @@ -100,6 +102,7 @@ httptools==0.6.4 # via uvicorn httpx==0.28.1 # via chromadb + # via ollama huggingface-hub==0.34.1 # via sentence-transformers # via tokenizers @@ -166,6 +169,8 @@ numpy==2.3.2 oauthlib==3.3.1 # via kubernetes # via requests-oauthlib +ollama==0.5.2 + # via umapyai onnxruntime==1.22.1 # via chromadb opentelemetry-api==1.35.0 @@ -222,6 +227,7 @@ pybase64==1.4.2 pydantic==2.11.7 # via chromadb # via confection + # via ollama # via spacy # via thinc # via weasel diff --git a/requirements.lock b/requirements.lock index bd090a9..f0d2f08 100644 --- a/requirements.lock +++ b/requirements.lock @@ -64,6 +64,8 @@ distro==1.9.0 # via posthog durationpy==0.10 # via kubernetes +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl + # via umapyai filelock==3.18.0 # via huggingface-hub # via torch @@ -100,6 +102,7 @@ httptools==0.6.4 # via uvicorn httpx==0.28.1 # via chromadb + # via ollama huggingface-hub==0.34.1 # via sentence-transformers # via tokenizers @@ -166,6 +169,8 @@ numpy==2.3.2 oauthlib==3.3.1 # via kubernetes # via requests-oauthlib +ollama==0.5.2 + # via umapyai onnxruntime==1.22.1 # via chromadb opentelemetry-api==1.35.0 @@ -220,6 +225,7 @@ pybase64==1.4.2 pydantic==2.11.7 # via chromadb # via confection + # via ollama # via spacy # via thinc # via weasel diff --git a/src/umapyai/__init__.py b/src/umapyai/__init__.py index e8abda6..b157516 100644 --- a/src/umapyai/__init__.py +++ b/src/umapyai/__init__.py @@ -3,16 +3,15 @@ import sys import json import chromadb from sentence_transformers import SentenceTransformer -import requests +import ollama from loguru import logger from threading import Thread from flask import Flask, send_file from flask_cors import CORS from flask_sock import Sock from .constants import (ARTICLES_DIRECTORY, CHROMA_DIRECTORY, CHROMA_COLLECTION, - CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K, - OLLAMA_URL) -from .ollama import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama + CHUNK_SIZE, EMBEDDING_MODEL, OLLAMA_MODEL, TOP_K) +from .ollama_server import start_ollama_server, is_ollama_live, ensure_model_pulled, kill_ollama from collections import defaultdict from .language import clean_for_match, get_query_phrases @@ -185,32 +184,14 @@ def main(): return merged[:top_k] def query_ollama(prompt, context=None): - url = f"{OLLAMA_URL}/api/generate" - payload = { - "model": OLLAMA_MODEL, - "prompt": prompt, - "stream": True, - } - - if context: - payload["context"] = context - try: - response = requests.post(url, json=payload, stream=True) - - response.raise_for_status() - - for line in response.iter_lines(): - if line: - json_response = json.loads(line) + for chunk in ollama.generate( + model=OLLAMA_MODEL, prompt=prompt, stream=True, context=context): + if not chunk.get("done"): + yield {"type": "answer_chunk", "data": chunk.get("response", "")} + else: + yield {"type": "history", "data": chunk.get("context")} - if not json_response.get("done"): - yield { - "type": "answer_chunk", - "data": json_response.get("response", "") - } - else: - yield {"type": "history", "data": json_response.get("context")} except Exception as error: error_message = f"Error communicating with Ollama: {error}" diff --git a/src/umapyai/constants.py b/src/umapyai/constants.py index 207ef8b..01601a3 100644 --- a/src/umapyai/constants.py +++ b/src/umapyai/constants.py @@ -5,4 +5,3 @@ CHUNK_SIZE = 350 # words EMBEDDING_MODEL = "all-MiniLM-L6-v2" OLLAMA_MODEL = "qwen3:14b" TOP_K = 4 -OLLAMA_URL = "http://localhost:11434" diff --git a/src/umapyai/ollama.py b/src/umapyai/ollama.py deleted file mode 100644 index 73329be..0000000 --- a/src/umapyai/ollama.py +++ /dev/null @@ -1,69 +0,0 @@ -import requests -import time -import subprocess -import psutil -from .constants import OLLAMA_URL -import os -from loguru import logger -import sys - - -def is_ollama_live(): - try: - response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=2) - - return response.status_code == 200 - except Exception: - return False - - -def start_ollama_server(): - logger.info("Starting Ollama server with OLLAMA_ORIGINS='*' ...") - - environment = os.environ.copy() - environment["OLLAMA_ORIGINS"] = "*" - process = subprocess.Popen(["ollama", "serve"], - env=environment, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True) - - for _ in range(30): - if is_ollama_live(): - logger.success("Ollama is now live.") - - return process - - time.sleep(1) - - logger.error("Ollama server did not start after 30 seconds.") - process.terminate() - sys.exit(1) - - -def kill_ollama(process): - logger.info("Killing Ollama ...") - - try: - parent_process = psutil.Process(process.pid) - - for child_process in parent_process.children(recursive=True): - child_process.terminate() - - parent_process.terminate() - except Exception as error: - logger.error(f"Error killing Ollama: {error}") - - -def ensure_model_pulled(model): - try: - tags = requests.get(f"{OLLAMA_URL}/api/tags").json().get("models", []) - - if not any(model in m.get("name", "") for m in tags): - logger.info(f"Pulling model '{model}' ...") - subprocess.run(["ollama", "pull", model], check=True) - else: - logger.success(f"Model '{model}' already pulled.") - except Exception as e: - logger.warning(f"Couldn't check/pull Ollama model: {e}") - logger.warning("Proceeding anyway ...") diff --git a/src/umapyai/ollama_server.py b/src/umapyai/ollama_server.py new file mode 100644 index 0000000..9121880 --- /dev/null +++ b/src/umapyai/ollama_server.py @@ -0,0 +1,68 @@ +import time +import subprocess +import psutil +import os +from loguru import logger +import sys +import ollama + + +def is_ollama_live(): + try: + ollama.list() + + return True + except Exception: + return False + + +def start_ollama_server(): + logger.info("Starting Ollama server with OLLAMA_ORIGINS='*' ...") + + environment = os.environ.copy() + environment["OLLAMA_ORIGINS"] = "*" + process = subprocess.Popen(["ollama", "serve"], + env=environment, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True) + + for _ in range(30): + if is_ollama_live(): + logger.success("Ollama is now live.") + + return process + + time.sleep(1) + + logger.error("Ollama server did not start after 30 seconds.") + process.terminate() + sys.exit(1) + + +def kill_ollama(process): + logger.info("Killing Ollama ...") + + try: + parent_process = psutil.Process(process.pid) + + for child_process in parent_process.children(recursive=True): + child_process.terminate() + + parent_process.terminate() + except Exception as error: + logger.error(f"Error killing Ollama: {error}") + + +def ensure_model_pulled(model): + try: + tags = ollama.list().get("models", []) + + if not any(model in m.get("name", "") for m in tags): + logger.info(f"Pulling model '{model}' ...") + ollama.pull(model) + else: + logger.success(f"Model '{model}' already pulled.") + except Exception as e: + logger.warning(f"Couldn't check/pull Ollama model: {e}") + logger.warning("Proceeding anyway ...") \ No newline at end of file -- cgit v1.2.3