diff options
| author | Fuwn <[email protected]> | 2025-08-17 01:14:56 +0200 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2025-08-17 01:14:56 +0200 |
| commit | 2745606556afb972875d7c4fc818d78b6181ab08 (patch) | |
| tree | b5dff62a51db96c08f455f778a792f3680283a99 | |
| parent | feat(html): Add Markdown rendering (diff) | |
| download | umapyai-main.tar.xz umapyai-main.zip | |
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | justfile | 4 | ||||
| -rw-r--r-- | pyproject.toml | 6 | ||||
| -rw-r--r-- | requirements-dev.lock | 193 | ||||
| -rw-r--r-- | requirements.lock | 194 | ||||
| -rw-r--r-- | src/umapyai_alternative/__init__.py | 96 | ||||
| -rw-r--r-- | src/umapyai_alternative/__main__.py | 4 |
7 files changed, 494 insertions, 4 deletions
@@ -12,3 +12,4 @@ wheels/ # umapyai chromadb uma_articles_* +qdrant_data @@ -2,6 +2,7 @@ project := "umapyai" alias gen := generate alias fmt := format +alias alt := run_alternative generate target="client": rye run ariadne-codegen {{ target }} @@ -9,6 +10,9 @@ generate target="client": run *arguments: rye run {{ project }} {{ arguments }} +run_alternative *arguments: + rye run {{ project }}_alternative {{ arguments }} + scrape *arguments: rye run article_scraper {{ arguments }} diff --git a/pyproject.toml b/pyproject.toml index d14954d..1cc0f0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,11 @@ dependencies = [ "spacy>=3.8.7", "ollama>=0.5.2", "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl", + "llama-index>=0.13.2", + "qdrant-client>=1.15.1", + "llama-index-llms-ollama>=0.7.1", + "llama-index-vector-stores-qdrant>=0.7.1", + "llama-index-embeddings-huggingface>=0.6.0", ] readme = "README.md" requires-python = ">= 3.8" @@ -23,6 +28,7 @@ requires-python = ">= 3.8" [project.scripts] "umapyai" = "umapyai:main" "article_scraper" = "article_scraper:main" +"umapyai_alternative" = "umapyai_alternative:main" [build-system] requires = ["hatchling"] diff --git a/requirements-dev.lock b/requirements-dev.lock index a7de775..3f9c68a 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -10,19 +10,33 @@ # universal: false -e file:. +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.12.15 + # via huggingface-hub + # via llama-index-core +aiosignal==1.4.0 + # via aiohttp +aiosqlite==0.21.0 + # via llama-index-core annotated-types==0.7.0 # via pydantic anyio==4.9.0 # via httpx + # via openai # via watchfiles attrs==25.3.0 + # via aiohttp # via jsonschema # via referencing backoff==2.2.1 # via posthog +banks==2.2.0 + # via llama-index-core bcrypt==4.3.0 # via chromadb beautifulsoup4==4.13.4 + # via llama-index-readers-file # via umapyai blinker==1.9.0 # via flask @@ -40,6 +54,7 @@ certifi==2025.7.14 # via httpcore # via httpx # via kubernetes + # via llama-cloud # via requests charset-normalizer==3.4.2 # via requests @@ -47,10 +62,14 @@ chromadb==1.0.15 # via umapyai click==8.2.1 # via flask + # via llama-cloud-services + # via nltk # via typer # via uvicorn cloudpathlib==0.21.1 # via weasel +colorama==0.4.6 + # via griffe coloredlogs==15.0.1 # via onnxruntime confection==0.1.5 @@ -60,7 +79,19 @@ cymem==2.0.11 # via preshed # via spacy # via thinc +dataclasses-json==0.6.7 + # via llama-index-core +defusedxml==0.7.1 + # via llama-index-readers-file +deprecated==1.2.18 + # via banks + # via llama-index-core + # via llama-index-indices-managed-llama-cloud + # via llama-index-instrumentation +dirtyjson==1.0.8 + # via llama-index-core distro==1.9.0 + # via openai # via posthog durationpy==0.10 # via kubernetes @@ -70,6 +101,8 @@ filelock==3.18.0 # via huggingface-hub # via torch # via transformers +filetype==1.2.0 + # via llama-index-core flask==3.1.1 # via flask-cors # via flask-sock @@ -80,39 +113,61 @@ flask-sock==0.7.0 # via umapyai flatbuffers==25.2.10 # via onnxruntime +frozenlist==1.7.0 + # via aiohttp + # via aiosignal fsspec==2025.7.0 # via huggingface-hub + # via llama-index-core # via torch google-auth==2.40.3 # via kubernetes googleapis-common-protos==1.70.0 # via opentelemetry-exporter-otlp-proto-grpc +greenlet==3.2.4 + # via sqlalchemy +griffe==1.12.1 + # via banks grpcio==1.74.0 # via chromadb + # via llama-index-vector-stores-qdrant # via opentelemetry-exporter-otlp-proto-grpc + # via qdrant-client h11==0.16.0 # via httpcore # via uvicorn # via wsproto +h2==4.2.0 + # via httpx hf-xet==1.1.5 # via huggingface-hub +hpack==4.1.0 + # via h2 httpcore==1.0.9 # via httpx httptools==0.6.4 # via uvicorn httpx==0.28.1 # via chromadb + # via llama-cloud + # via llama-index-core # via ollama + # via openai + # via qdrant-client huggingface-hub==0.34.1 + # via llama-index-embeddings-huggingface # via sentence-transformers # via tokenizers # via transformers humanfriendly==10.0 # via coloredlogs +hyperframe==6.1.0 + # via h2 idna==3.10 # via anyio # via httpx # via requests + # via yarl importlib-metadata==8.7.0 # via opentelemetry-api importlib-resources==6.5.2 @@ -120,10 +175,14 @@ importlib-resources==6.5.2 itsdangerous==2.2.0 # via flask jinja2==3.1.6 + # via banks # via flask # via spacy # via torch +jiter==0.10.0 + # via openai joblib==1.5.1 + # via nltk # via scikit-learn jsonschema==4.25.0 # via chromadb @@ -135,6 +194,51 @@ langcodes==3.5.0 # via spacy language-data==1.3.0 # via langcodes +llama-cloud==0.1.35 + # via llama-cloud-services + # via llama-index-indices-managed-llama-cloud +llama-cloud-services==0.6.54 + # via llama-parse +llama-index==0.13.2 + # via umapyai +llama-index-cli==0.5.0 + # via llama-index +llama-index-core==0.13.2 + # via llama-cloud-services + # via llama-index + # via llama-index-cli + # via llama-index-embeddings-huggingface + # via llama-index-embeddings-openai + # via llama-index-indices-managed-llama-cloud + # via llama-index-llms-ollama + # via llama-index-llms-openai + # via llama-index-readers-file + # via llama-index-readers-llama-parse + # via llama-index-vector-stores-qdrant +llama-index-embeddings-huggingface==0.6.0 + # via umapyai +llama-index-embeddings-openai==0.5.0 + # via llama-index + # via llama-index-cli +llama-index-indices-managed-llama-cloud==0.9.1 + # via llama-index +llama-index-instrumentation==0.4.0 + # via llama-index-workflows +llama-index-llms-ollama==0.7.1 + # via umapyai +llama-index-llms-openai==0.5.4 + # via llama-index + # via llama-index-cli +llama-index-readers-file==0.5.1 + # via llama-index +llama-index-readers-llama-parse==0.5.0 + # via llama-index +llama-index-vector-stores-qdrant==0.7.1 + # via umapyai +llama-index-workflows==1.3.0 + # via llama-index-core +llama-parse==0.6.54 + # via llama-index-readers-llama-parse loguru==0.7.3 # via umapyai marisa-trie==1.2.1 @@ -145,22 +249,38 @@ markupsafe==3.0.2 # via flask # via jinja2 # via werkzeug +marshmallow==3.26.1 + # via dataclasses-json mdurl==0.1.2 # via markdown-it-py mmh3==5.1.0 # via chromadb mpmath==1.3.0 # via sympy +multidict==6.6.4 + # via aiohttp + # via yarl murmurhash==1.0.13 # via preshed # via spacy # via thinc +mypy-extensions==1.1.0 + # via typing-inspect +nest-asyncio==1.6.0 + # via llama-index-core networkx==3.5 + # via llama-index-core # via torch +nltk==3.9.1 + # via llama-index + # via llama-index-core numpy==2.3.2 # via blis # via chromadb + # via llama-index-core # via onnxruntime + # via pandas + # via qdrant-client # via scikit-learn # via scipy # via spacy @@ -170,9 +290,13 @@ oauthlib==3.3.1 # via kubernetes # via requests-oauthlib ollama==0.5.2 + # via llama-index-llms-ollama # via umapyai onnxruntime==1.22.1 # via chromadb +openai==1.99.9 + # via llama-index-embeddings-openai + # via llama-index-llms-openai opentelemetry-api==1.35.0 # via chromadb # via opentelemetry-exporter-otlp-proto-grpc @@ -197,24 +321,37 @@ overrides==7.7.0 packaging==25.0 # via build # via huggingface-hub + # via marshmallow # via onnxruntime # via spacy # via thinc # via transformers # via weasel +pandas==2.2.3 + # via llama-index-readers-file pillow==11.3.0 + # via llama-index-core # via sentence-transformers platformdirs==4.3.8 + # via banks + # via llama-cloud-services + # via llama-index-core # via yapf +portalocker==3.2.0 + # via qdrant-client posthog==5.4.0 # via chromadb preshed==3.0.10 # via spacy # via thinc +propcache==0.3.2 + # via aiohttp + # via yarl protobuf==6.31.1 # via googleapis-common-protos # via onnxruntime # via opentelemetry-proto + # via qdrant-client psutil==7.0.0 # via umapyai pyasn1==0.6.1 @@ -225,9 +362,17 @@ pyasn1-modules==0.4.2 pybase64==1.4.2 # via chromadb pydantic==2.11.7 + # via banks # via chromadb # via confection + # via llama-cloud + # via llama-cloud-services + # via llama-index-core + # via llama-index-instrumentation + # via llama-index-workflows # via ollama + # via openai + # via qdrant-client # via spacy # via thinc # via weasel @@ -235,32 +380,46 @@ pydantic-core==2.33.2 # via pydantic pygments==2.19.2 # via rich +pypdf==5.9.0 + # via llama-index-readers-file pypika==0.48.9 # via chromadb pyproject-hooks==1.2.0 # via build python-dateutil==2.9.0.post0 # via kubernetes + # via pandas # via posthog python-dotenv==1.1.1 + # via llama-cloud-services # via uvicorn +pytz==2025.2 + # via pandas pyyaml==6.0.2 # via chromadb # via huggingface-hub # via kubernetes + # via llama-index-core # via transformers # via uvicorn +qdrant-client==1.15.1 + # via llama-index-vector-stores-qdrant + # via umapyai referencing==0.36.2 # via jsonschema # via jsonschema-specifications regex==2024.11.6 + # via nltk + # via tiktoken # via transformers requests==2.32.4 # via huggingface-hub # via kubernetes + # via llama-index-core # via posthog # via requests-oauthlib # via spacy + # via tiktoken # via transformers # via umapyai # via weasel @@ -283,8 +442,10 @@ scipy==1.16.1 # via scikit-learn # via sentence-transformers sentence-transformers==5.0.0 + # via llama-index-embeddings-huggingface # via umapyai setuptools==80.9.0 + # via llama-index-core # via marisa-trie # via spacy # via thinc @@ -301,6 +462,7 @@ smart-open==7.3.0.post1 # via weasel sniffio==1.3.1 # via anyio + # via openai soupsieve==2.7 # via beautifulsoup4 spacy==3.8.7 @@ -309,41 +471,56 @@ spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 # via spacy +sqlalchemy==2.0.43 + # via llama-index-core srsly==2.5.1 # via confection # via spacy # via thinc # via weasel +striprtf==0.0.26 + # via llama-index-readers-file sympy==1.14.0 # via onnxruntime # via torch tenacity==9.1.2 # via chromadb + # via llama-cloud-services + # via llama-index-core thinc==8.3.6 # via spacy threadpoolctl==3.6.0 # via scikit-learn +tiktoken==0.11.0 + # via llama-index-core tokenizers==0.21.2 # via chromadb # via transformers -torch==2.7.1 +torch==2.8.0 # via sentence-transformers tqdm==4.67.1 # via chromadb # via huggingface-hub + # via llama-index-core + # via nltk + # via openai # via sentence-transformers # via spacy # via transformers -transformers==4.54.0 +transformers==4.55.2 # via sentence-transformers typer==0.16.0 # via chromadb # via spacy # via weasel typing-extensions==4.14.1 + # via aiosqlite # via beautifulsoup4 # via chromadb # via huggingface-hub + # via llama-index-core + # via llama-index-workflows + # via openai # via opentelemetry-api # via opentelemetry-exporter-otlp-proto-grpc # via opentelemetry-sdk @@ -351,13 +528,21 @@ typing-extensions==4.14.1 # via pydantic # via pydantic-core # via sentence-transformers + # via sqlalchemy # via torch # via typer + # via typing-inspect # via typing-inspection +typing-inspect==0.9.0 + # via dataclasses-json + # via llama-index-core typing-inspection==0.4.1 # via pydantic +tzdata==2025.2 + # via pandas urllib3==2.5.0 # via kubernetes + # via qdrant-client # via requests uvicorn==0.35.0 # via chromadb @@ -379,9 +564,13 @@ werkzeug==3.1.3 # via flask # via flask-cors wrapt==1.17.2 + # via deprecated + # via llama-index-core # via smart-open wsproto==1.2.0 # via simple-websocket yapf==0.43.0 +yarl==1.20.1 + # via aiohttp zipp==3.23.0 # via importlib-metadata diff --git a/requirements.lock b/requirements.lock index f0d2f08..80be97b 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,19 +10,33 @@ # universal: false -e file:. +aiohappyeyeballs==2.6.1 + # via aiohttp +aiohttp==3.12.15 + # via huggingface-hub + # via llama-index-core +aiosignal==1.4.0 + # via aiohttp +aiosqlite==0.21.0 + # via llama-index-core annotated-types==0.7.0 # via pydantic anyio==4.9.0 # via httpx + # via openai # via watchfiles attrs==25.3.0 + # via aiohttp # via jsonschema # via referencing backoff==2.2.1 # via posthog +banks==2.2.0 + # via llama-index-core bcrypt==4.3.0 # via chromadb beautifulsoup4==4.13.4 + # via llama-index-readers-file # via umapyai blinker==1.9.0 # via flask @@ -40,6 +54,7 @@ certifi==2025.7.14 # via httpcore # via httpx # via kubernetes + # via llama-cloud # via requests charset-normalizer==3.4.2 # via requests @@ -47,10 +62,14 @@ chromadb==1.0.15 # via umapyai click==8.2.1 # via flask + # via llama-cloud-services + # via nltk # via typer # via uvicorn cloudpathlib==0.21.1 # via weasel +colorama==0.4.6 + # via griffe coloredlogs==15.0.1 # via onnxruntime confection==0.1.5 @@ -60,7 +79,19 @@ cymem==2.0.11 # via preshed # via spacy # via thinc +dataclasses-json==0.6.7 + # via llama-index-core +defusedxml==0.7.1 + # via llama-index-readers-file +deprecated==1.2.18 + # via banks + # via llama-index-core + # via llama-index-indices-managed-llama-cloud + # via llama-index-instrumentation +dirtyjson==1.0.8 + # via llama-index-core distro==1.9.0 + # via openai # via posthog durationpy==0.10 # via kubernetes @@ -70,6 +101,8 @@ filelock==3.18.0 # via huggingface-hub # via torch # via transformers +filetype==1.2.0 + # via llama-index-core flask==3.1.1 # via flask-cors # via flask-sock @@ -80,39 +113,61 @@ flask-sock==0.7.0 # via umapyai flatbuffers==25.2.10 # via onnxruntime +frozenlist==1.7.0 + # via aiohttp + # via aiosignal fsspec==2025.7.0 # via huggingface-hub + # via llama-index-core # via torch google-auth==2.40.3 # via kubernetes googleapis-common-protos==1.70.0 # via opentelemetry-exporter-otlp-proto-grpc +greenlet==3.2.4 + # via sqlalchemy +griffe==1.12.1 + # via banks grpcio==1.74.0 # via chromadb + # via llama-index-vector-stores-qdrant # via opentelemetry-exporter-otlp-proto-grpc + # via qdrant-client h11==0.16.0 # via httpcore # via uvicorn # via wsproto +h2==4.2.0 + # via httpx hf-xet==1.1.5 # via huggingface-hub +hpack==4.1.0 + # via h2 httpcore==1.0.9 # via httpx httptools==0.6.4 # via uvicorn httpx==0.28.1 # via chromadb + # via llama-cloud + # via llama-index-core # via ollama + # via openai + # via qdrant-client huggingface-hub==0.34.1 + # via llama-index-embeddings-huggingface # via sentence-transformers # via tokenizers # via transformers humanfriendly==10.0 # via coloredlogs +hyperframe==6.1.0 + # via h2 idna==3.10 # via anyio # via httpx # via requests + # via yarl importlib-metadata==8.7.0 # via opentelemetry-api importlib-resources==6.5.2 @@ -120,10 +175,14 @@ importlib-resources==6.5.2 itsdangerous==2.2.0 # via flask jinja2==3.1.6 + # via banks # via flask # via spacy # via torch +jiter==0.10.0 + # via openai joblib==1.5.1 + # via nltk # via scikit-learn jsonschema==4.25.0 # via chromadb @@ -135,6 +194,51 @@ langcodes==3.5.0 # via spacy language-data==1.3.0 # via langcodes +llama-cloud==0.1.35 + # via llama-cloud-services + # via llama-index-indices-managed-llama-cloud +llama-cloud-services==0.6.54 + # via llama-parse +llama-index==0.13.2 + # via umapyai +llama-index-cli==0.5.0 + # via llama-index +llama-index-core==0.13.2 + # via llama-cloud-services + # via llama-index + # via llama-index-cli + # via llama-index-embeddings-huggingface + # via llama-index-embeddings-openai + # via llama-index-indices-managed-llama-cloud + # via llama-index-llms-ollama + # via llama-index-llms-openai + # via llama-index-readers-file + # via llama-index-readers-llama-parse + # via llama-index-vector-stores-qdrant +llama-index-embeddings-huggingface==0.6.0 + # via umapyai +llama-index-embeddings-openai==0.5.0 + # via llama-index + # via llama-index-cli +llama-index-indices-managed-llama-cloud==0.9.1 + # via llama-index +llama-index-instrumentation==0.4.0 + # via llama-index-workflows +llama-index-llms-ollama==0.7.1 + # via umapyai +llama-index-llms-openai==0.5.4 + # via llama-index + # via llama-index-cli +llama-index-readers-file==0.5.1 + # via llama-index +llama-index-readers-llama-parse==0.5.0 + # via llama-index +llama-index-vector-stores-qdrant==0.7.1 + # via umapyai +llama-index-workflows==1.3.0 + # via llama-index-core +llama-parse==0.6.54 + # via llama-index-readers-llama-parse loguru==0.7.3 # via umapyai marisa-trie==1.2.1 @@ -145,22 +249,38 @@ markupsafe==3.0.2 # via flask # via jinja2 # via werkzeug +marshmallow==3.26.1 + # via dataclasses-json mdurl==0.1.2 # via markdown-it-py mmh3==5.1.0 # via chromadb mpmath==1.3.0 # via sympy +multidict==6.6.4 + # via aiohttp + # via yarl murmurhash==1.0.13 # via preshed # via spacy # via thinc +mypy-extensions==1.1.0 + # via typing-inspect +nest-asyncio==1.6.0 + # via llama-index-core networkx==3.5 + # via llama-index-core # via torch +nltk==3.9.1 + # via llama-index + # via llama-index-core numpy==2.3.2 # via blis # via chromadb + # via llama-index-core # via onnxruntime + # via pandas + # via qdrant-client # via scikit-learn # via scipy # via spacy @@ -170,9 +290,13 @@ oauthlib==3.3.1 # via kubernetes # via requests-oauthlib ollama==0.5.2 + # via llama-index-llms-ollama # via umapyai onnxruntime==1.22.1 # via chromadb +openai==1.99.9 + # via llama-index-embeddings-openai + # via llama-index-llms-openai opentelemetry-api==1.35.0 # via chromadb # via opentelemetry-exporter-otlp-proto-grpc @@ -197,22 +321,36 @@ overrides==7.7.0 packaging==25.0 # via build # via huggingface-hub + # via marshmallow # via onnxruntime # via spacy # via thinc # via transformers # via weasel +pandas==2.2.3 + # via llama-index-readers-file pillow==11.3.0 + # via llama-index-core # via sentence-transformers +platformdirs==4.3.8 + # via banks + # via llama-cloud-services + # via llama-index-core +portalocker==3.2.0 + # via qdrant-client posthog==5.4.0 # via chromadb preshed==3.0.10 # via spacy # via thinc +propcache==0.3.2 + # via aiohttp + # via yarl protobuf==6.31.1 # via googleapis-common-protos # via onnxruntime # via opentelemetry-proto + # via qdrant-client psutil==7.0.0 # via umapyai pyasn1==0.6.1 @@ -223,9 +361,17 @@ pyasn1-modules==0.4.2 pybase64==1.4.2 # via chromadb pydantic==2.11.7 + # via banks # via chromadb # via confection + # via llama-cloud + # via llama-cloud-services + # via llama-index-core + # via llama-index-instrumentation + # via llama-index-workflows # via ollama + # via openai + # via qdrant-client # via spacy # via thinc # via weasel @@ -233,32 +379,46 @@ pydantic-core==2.33.2 # via pydantic pygments==2.19.2 # via rich +pypdf==5.9.0 + # via llama-index-readers-file pypika==0.48.9 # via chromadb pyproject-hooks==1.2.0 # via build python-dateutil==2.9.0.post0 # via kubernetes + # via pandas # via posthog python-dotenv==1.1.1 + # via llama-cloud-services # via uvicorn +pytz==2025.2 + # via pandas pyyaml==6.0.2 # via chromadb # via huggingface-hub # via kubernetes + # via llama-index-core # via transformers # via uvicorn +qdrant-client==1.15.1 + # via llama-index-vector-stores-qdrant + # via umapyai referencing==0.36.2 # via jsonschema # via jsonschema-specifications regex==2024.11.6 + # via nltk + # via tiktoken # via transformers requests==2.32.4 # via huggingface-hub # via kubernetes + # via llama-index-core # via posthog # via requests-oauthlib # via spacy + # via tiktoken # via transformers # via umapyai # via weasel @@ -280,8 +440,10 @@ scipy==1.16.1 # via scikit-learn # via sentence-transformers sentence-transformers==5.0.0 + # via llama-index-embeddings-huggingface # via umapyai setuptools==80.9.0 + # via llama-index-core # via marisa-trie # via spacy # via thinc @@ -298,6 +460,7 @@ smart-open==7.3.0.post1 # via weasel sniffio==1.3.1 # via anyio + # via openai soupsieve==2.7 # via beautifulsoup4 spacy==3.8.7 @@ -306,41 +469,56 @@ spacy-legacy==3.0.12 # via spacy spacy-loggers==1.0.5 # via spacy +sqlalchemy==2.0.43 + # via llama-index-core srsly==2.5.1 # via confection # via spacy # via thinc # via weasel +striprtf==0.0.26 + # via llama-index-readers-file sympy==1.14.0 # via onnxruntime # via torch tenacity==9.1.2 # via chromadb + # via llama-cloud-services + # via llama-index-core thinc==8.3.6 # via spacy threadpoolctl==3.6.0 # via scikit-learn +tiktoken==0.11.0 + # via llama-index-core tokenizers==0.21.2 # via chromadb # via transformers -torch==2.7.1 +torch==2.8.0 # via sentence-transformers tqdm==4.67.1 # via chromadb # via huggingface-hub + # via llama-index-core + # via nltk + # via openai # via sentence-transformers # via spacy # via transformers -transformers==4.54.0 +transformers==4.55.2 # via sentence-transformers typer==0.16.0 # via chromadb # via spacy # via weasel typing-extensions==4.14.1 + # via aiosqlite # via beautifulsoup4 # via chromadb # via huggingface-hub + # via llama-index-core + # via llama-index-workflows + # via openai # via opentelemetry-api # via opentelemetry-exporter-otlp-proto-grpc # via opentelemetry-sdk @@ -348,13 +526,21 @@ typing-extensions==4.14.1 # via pydantic # via pydantic-core # via sentence-transformers + # via sqlalchemy # via torch # via typer + # via typing-inspect # via typing-inspection +typing-inspect==0.9.0 + # via dataclasses-json + # via llama-index-core typing-inspection==0.4.1 # via pydantic +tzdata==2025.2 + # via pandas urllib3==2.5.0 # via kubernetes + # via qdrant-client # via requests uvicorn==0.35.0 # via chromadb @@ -376,8 +562,12 @@ werkzeug==3.1.3 # via flask # via flask-cors wrapt==1.17.2 + # via deprecated + # via llama-index-core # via smart-open wsproto==1.2.0 # via simple-websocket +yarl==1.20.1 + # via aiohttp zipp==3.23.0 # via importlib-metadata diff --git a/src/umapyai_alternative/__init__.py b/src/umapyai_alternative/__init__.py new file mode 100644 index 0000000..9e613a1 --- /dev/null +++ b/src/umapyai_alternative/__init__.py @@ -0,0 +1,96 @@ +import qdrant_client +from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings +from llama_index.core.storage import StorageContext +from llama_index.llms.ollama import Ollama +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.vector_stores.qdrant import QdrantVectorStore +import logging +import threading +import sys +import time + + +def _wait_animation(stop_event: threading.Event, interval: float = 0.35): + frames = ["", ".", "..", "..."] + max_length = max(len(frame) for frame in frames) + index = 0 + + while not stop_event.is_set(): + frame = frames[index % len(frames)] + + sys.stdout.write("\r" + frame + " " * (max_length - len(frame))) + + index += 1 + + time.sleep(interval) + + sys.stdout.write("\r" + " " * max_length + "\r") + sys.stdout.flush() + + +def main(): + logging.basicConfig(level=logging.WARNING) + + documents = SimpleDirectoryReader("./uma_articles_clean").load_data() + client = qdrant_client.QdrantClient(path="./qdrant_data") + vector_store = QdrantVectorStore(client=client, collection_name="umamusume") + storage_context = StorageContext.from_defaults(vector_store=vector_store) + llm = Ollama(model="gpt-oss:20b", request_timeout=120.0) + embedding_model = HuggingFaceEmbedding( + model_name="sentence-transformers/all-MiniLM-L6-v2") + + Settings.llm = llm + Settings.embed_model = embedding_model + + index = VectorStoreIndex.from_documents( + documents, + storage_context=storage_context, + ) + query_engine = index.as_query_engine(streaming=True) + + try: + while True: + try: + user_query = input("> ").strip() + except EOFError: + print() + + break + + if not user_query: + continue + + if user_query.lower() in {"quit", "exit", "q"}: + break + + response_stream = query_engine.query(user_query) + stop_event = threading.Event() + + print() + + animation_thread = threading.Thread( + target=_wait_animation, args=(stop_event,), daemon=True) + + animation_thread.start() + + got_first_token = False + + try: + for token in response_stream.response_gen: + if not got_first_token: + stop_event.set() + animation_thread.join() + + got_first_token = True + + print(token, end="", flush=True) + finally: + if not got_first_token: + stop_event.set() + animation_thread.join() + + print("\n") + except KeyboardInterrupt: + print() + + print() diff --git a/src/umapyai_alternative/__main__.py b/src/umapyai_alternative/__main__.py new file mode 100644 index 0000000..1680fe8 --- /dev/null +++ b/src/umapyai_alternative/__main__.py @@ -0,0 +1,4 @@ +import umapyai_alternative +import sys + +sys.exit(umapyai_alternative.main()) |