aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--justfile4
-rw-r--r--pyproject.toml6
-rw-r--r--requirements-dev.lock193
-rw-r--r--requirements.lock194
-rw-r--r--src/umapyai_alternative/__init__.py96
-rw-r--r--src/umapyai_alternative/__main__.py4
7 files changed, 494 insertions, 4 deletions
diff --git a/.gitignore b/.gitignore
index fa9041a..78f8ca9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,4 @@ wheels/
# umapyai
chromadb
uma_articles_*
+qdrant_data
diff --git a/justfile b/justfile
index b23d891..70a091b 100644
--- a/justfile
+++ b/justfile
@@ -2,6 +2,7 @@ project := "umapyai"
alias gen := generate
alias fmt := format
+alias alt := run_alternative
generate target="client":
rye run ariadne-codegen {{ target }}
@@ -9,6 +10,9 @@ generate target="client":
run *arguments:
rye run {{ project }} {{ arguments }}
+run_alternative *arguments:
+ rye run {{ project }}_alternative {{ arguments }}
+
scrape *arguments:
rye run article_scraper {{ arguments }}
diff --git a/pyproject.toml b/pyproject.toml
index d14954d..1cc0f0b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,11 @@ dependencies = [
"spacy>=3.8.7",
"ollama>=0.5.2",
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
+ "llama-index>=0.13.2",
+ "qdrant-client>=1.15.1",
+ "llama-index-llms-ollama>=0.7.1",
+ "llama-index-vector-stores-qdrant>=0.7.1",
+ "llama-index-embeddings-huggingface>=0.6.0",
]
readme = "README.md"
requires-python = ">= 3.8"
@@ -23,6 +28,7 @@ requires-python = ">= 3.8"
[project.scripts]
"umapyai" = "umapyai:main"
"article_scraper" = "article_scraper:main"
+"umapyai_alternative" = "umapyai_alternative:main"
[build-system]
requires = ["hatchling"]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index a7de775..3f9c68a 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -10,19 +10,33 @@
# universal: false
-e file:.
+aiohappyeyeballs==2.6.1
+ # via aiohttp
+aiohttp==3.12.15
+ # via huggingface-hub
+ # via llama-index-core
+aiosignal==1.4.0
+ # via aiohttp
+aiosqlite==0.21.0
+ # via llama-index-core
annotated-types==0.7.0
# via pydantic
anyio==4.9.0
# via httpx
+ # via openai
# via watchfiles
attrs==25.3.0
+ # via aiohttp
# via jsonschema
# via referencing
backoff==2.2.1
# via posthog
+banks==2.2.0
+ # via llama-index-core
bcrypt==4.3.0
# via chromadb
beautifulsoup4==4.13.4
+ # via llama-index-readers-file
# via umapyai
blinker==1.9.0
# via flask
@@ -40,6 +54,7 @@ certifi==2025.7.14
# via httpcore
# via httpx
# via kubernetes
+ # via llama-cloud
# via requests
charset-normalizer==3.4.2
# via requests
@@ -47,10 +62,14 @@ chromadb==1.0.15
# via umapyai
click==8.2.1
# via flask
+ # via llama-cloud-services
+ # via nltk
# via typer
# via uvicorn
cloudpathlib==0.21.1
# via weasel
+colorama==0.4.6
+ # via griffe
coloredlogs==15.0.1
# via onnxruntime
confection==0.1.5
@@ -60,7 +79,19 @@ cymem==2.0.11
# via preshed
# via spacy
# via thinc
+dataclasses-json==0.6.7
+ # via llama-index-core
+defusedxml==0.7.1
+ # via llama-index-readers-file
+deprecated==1.2.18
+ # via banks
+ # via llama-index-core
+ # via llama-index-indices-managed-llama-cloud
+ # via llama-index-instrumentation
+dirtyjson==1.0.8
+ # via llama-index-core
distro==1.9.0
+ # via openai
# via posthog
durationpy==0.10
# via kubernetes
@@ -70,6 +101,8 @@ filelock==3.18.0
# via huggingface-hub
# via torch
# via transformers
+filetype==1.2.0
+ # via llama-index-core
flask==3.1.1
# via flask-cors
# via flask-sock
@@ -80,39 +113,61 @@ flask-sock==0.7.0
# via umapyai
flatbuffers==25.2.10
# via onnxruntime
+frozenlist==1.7.0
+ # via aiohttp
+ # via aiosignal
fsspec==2025.7.0
# via huggingface-hub
+ # via llama-index-core
# via torch
google-auth==2.40.3
# via kubernetes
googleapis-common-protos==1.70.0
# via opentelemetry-exporter-otlp-proto-grpc
+greenlet==3.2.4
+ # via sqlalchemy
+griffe==1.12.1
+ # via banks
grpcio==1.74.0
# via chromadb
+ # via llama-index-vector-stores-qdrant
# via opentelemetry-exporter-otlp-proto-grpc
+ # via qdrant-client
h11==0.16.0
# via httpcore
# via uvicorn
# via wsproto
+h2==4.2.0
+ # via httpx
hf-xet==1.1.5
# via huggingface-hub
+hpack==4.1.0
+ # via h2
httpcore==1.0.9
# via httpx
httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via chromadb
+ # via llama-cloud
+ # via llama-index-core
# via ollama
+ # via openai
+ # via qdrant-client
huggingface-hub==0.34.1
+ # via llama-index-embeddings-huggingface
# via sentence-transformers
# via tokenizers
# via transformers
humanfriendly==10.0
# via coloredlogs
+hyperframe==6.1.0
+ # via h2
idna==3.10
# via anyio
# via httpx
# via requests
+ # via yarl
importlib-metadata==8.7.0
# via opentelemetry-api
importlib-resources==6.5.2
@@ -120,10 +175,14 @@ importlib-resources==6.5.2
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
+ # via banks
# via flask
# via spacy
# via torch
+jiter==0.10.0
+ # via openai
joblib==1.5.1
+ # via nltk
# via scikit-learn
jsonschema==4.25.0
# via chromadb
@@ -135,6 +194,51 @@ langcodes==3.5.0
# via spacy
language-data==1.3.0
# via langcodes
+llama-cloud==0.1.35
+ # via llama-cloud-services
+ # via llama-index-indices-managed-llama-cloud
+llama-cloud-services==0.6.54
+ # via llama-parse
+llama-index==0.13.2
+ # via umapyai
+llama-index-cli==0.5.0
+ # via llama-index
+llama-index-core==0.13.2
+ # via llama-cloud-services
+ # via llama-index
+ # via llama-index-cli
+ # via llama-index-embeddings-huggingface
+ # via llama-index-embeddings-openai
+ # via llama-index-indices-managed-llama-cloud
+ # via llama-index-llms-ollama
+ # via llama-index-llms-openai
+ # via llama-index-readers-file
+ # via llama-index-readers-llama-parse
+ # via llama-index-vector-stores-qdrant
+llama-index-embeddings-huggingface==0.6.0
+ # via umapyai
+llama-index-embeddings-openai==0.5.0
+ # via llama-index
+ # via llama-index-cli
+llama-index-indices-managed-llama-cloud==0.9.1
+ # via llama-index
+llama-index-instrumentation==0.4.0
+ # via llama-index-workflows
+llama-index-llms-ollama==0.7.1
+ # via umapyai
+llama-index-llms-openai==0.5.4
+ # via llama-index
+ # via llama-index-cli
+llama-index-readers-file==0.5.1
+ # via llama-index
+llama-index-readers-llama-parse==0.5.0
+ # via llama-index
+llama-index-vector-stores-qdrant==0.7.1
+ # via umapyai
+llama-index-workflows==1.3.0
+ # via llama-index-core
+llama-parse==0.6.54
+ # via llama-index-readers-llama-parse
loguru==0.7.3
# via umapyai
marisa-trie==1.2.1
@@ -145,22 +249,38 @@ markupsafe==3.0.2
# via flask
# via jinja2
# via werkzeug
+marshmallow==3.26.1
+ # via dataclasses-json
mdurl==0.1.2
# via markdown-it-py
mmh3==5.1.0
# via chromadb
mpmath==1.3.0
# via sympy
+multidict==6.6.4
+ # via aiohttp
+ # via yarl
murmurhash==1.0.13
# via preshed
# via spacy
# via thinc
+mypy-extensions==1.1.0
+ # via typing-inspect
+nest-asyncio==1.6.0
+ # via llama-index-core
networkx==3.5
+ # via llama-index-core
# via torch
+nltk==3.9.1
+ # via llama-index
+ # via llama-index-core
numpy==2.3.2
# via blis
# via chromadb
+ # via llama-index-core
# via onnxruntime
+ # via pandas
+ # via qdrant-client
# via scikit-learn
# via scipy
# via spacy
@@ -170,9 +290,13 @@ oauthlib==3.3.1
# via kubernetes
# via requests-oauthlib
ollama==0.5.2
+ # via llama-index-llms-ollama
# via umapyai
onnxruntime==1.22.1
# via chromadb
+openai==1.99.9
+ # via llama-index-embeddings-openai
+ # via llama-index-llms-openai
opentelemetry-api==1.35.0
# via chromadb
# via opentelemetry-exporter-otlp-proto-grpc
@@ -197,24 +321,37 @@ overrides==7.7.0
packaging==25.0
# via build
# via huggingface-hub
+ # via marshmallow
# via onnxruntime
# via spacy
# via thinc
# via transformers
# via weasel
+pandas==2.2.3
+ # via llama-index-readers-file
pillow==11.3.0
+ # via llama-index-core
# via sentence-transformers
platformdirs==4.3.8
+ # via banks
+ # via llama-cloud-services
+ # via llama-index-core
# via yapf
+portalocker==3.2.0
+ # via qdrant-client
posthog==5.4.0
# via chromadb
preshed==3.0.10
# via spacy
# via thinc
+propcache==0.3.2
+ # via aiohttp
+ # via yarl
protobuf==6.31.1
# via googleapis-common-protos
# via onnxruntime
# via opentelemetry-proto
+ # via qdrant-client
psutil==7.0.0
# via umapyai
pyasn1==0.6.1
@@ -225,9 +362,17 @@ pyasn1-modules==0.4.2
pybase64==1.4.2
# via chromadb
pydantic==2.11.7
+ # via banks
# via chromadb
# via confection
+ # via llama-cloud
+ # via llama-cloud-services
+ # via llama-index-core
+ # via llama-index-instrumentation
+ # via llama-index-workflows
# via ollama
+ # via openai
+ # via qdrant-client
# via spacy
# via thinc
# via weasel
@@ -235,32 +380,46 @@ pydantic-core==2.33.2
# via pydantic
pygments==2.19.2
# via rich
+pypdf==5.9.0
+ # via llama-index-readers-file
pypika==0.48.9
# via chromadb
pyproject-hooks==1.2.0
# via build
python-dateutil==2.9.0.post0
# via kubernetes
+ # via pandas
# via posthog
python-dotenv==1.1.1
+ # via llama-cloud-services
# via uvicorn
+pytz==2025.2
+ # via pandas
pyyaml==6.0.2
# via chromadb
# via huggingface-hub
# via kubernetes
+ # via llama-index-core
# via transformers
# via uvicorn
+qdrant-client==1.15.1
+ # via llama-index-vector-stores-qdrant
+ # via umapyai
referencing==0.36.2
# via jsonschema
# via jsonschema-specifications
regex==2024.11.6
+ # via nltk
+ # via tiktoken
# via transformers
requests==2.32.4
# via huggingface-hub
# via kubernetes
+ # via llama-index-core
# via posthog
# via requests-oauthlib
# via spacy
+ # via tiktoken
# via transformers
# via umapyai
# via weasel
@@ -283,8 +442,10 @@ scipy==1.16.1
# via scikit-learn
# via sentence-transformers
sentence-transformers==5.0.0
+ # via llama-index-embeddings-huggingface
# via umapyai
setuptools==80.9.0
+ # via llama-index-core
# via marisa-trie
# via spacy
# via thinc
@@ -301,6 +462,7 @@ smart-open==7.3.0.post1
# via weasel
sniffio==1.3.1
# via anyio
+ # via openai
soupsieve==2.7
# via beautifulsoup4
spacy==3.8.7
@@ -309,41 +471,56 @@ spacy-legacy==3.0.12
# via spacy
spacy-loggers==1.0.5
# via spacy
+sqlalchemy==2.0.43
+ # via llama-index-core
srsly==2.5.1
# via confection
# via spacy
# via thinc
# via weasel
+striprtf==0.0.26
+ # via llama-index-readers-file
sympy==1.14.0
# via onnxruntime
# via torch
tenacity==9.1.2
# via chromadb
+ # via llama-cloud-services
+ # via llama-index-core
thinc==8.3.6
# via spacy
threadpoolctl==3.6.0
# via scikit-learn
+tiktoken==0.11.0
+ # via llama-index-core
tokenizers==0.21.2
# via chromadb
# via transformers
-torch==2.7.1
+torch==2.8.0
# via sentence-transformers
tqdm==4.67.1
# via chromadb
# via huggingface-hub
+ # via llama-index-core
+ # via nltk
+ # via openai
# via sentence-transformers
# via spacy
# via transformers
-transformers==4.54.0
+transformers==4.55.2
# via sentence-transformers
typer==0.16.0
# via chromadb
# via spacy
# via weasel
typing-extensions==4.14.1
+ # via aiosqlite
# via beautifulsoup4
# via chromadb
# via huggingface-hub
+ # via llama-index-core
+ # via llama-index-workflows
+ # via openai
# via opentelemetry-api
# via opentelemetry-exporter-otlp-proto-grpc
# via opentelemetry-sdk
@@ -351,13 +528,21 @@ typing-extensions==4.14.1
# via pydantic
# via pydantic-core
# via sentence-transformers
+ # via sqlalchemy
# via torch
# via typer
+ # via typing-inspect
# via typing-inspection
+typing-inspect==0.9.0
+ # via dataclasses-json
+ # via llama-index-core
typing-inspection==0.4.1
# via pydantic
+tzdata==2025.2
+ # via pandas
urllib3==2.5.0
# via kubernetes
+ # via qdrant-client
# via requests
uvicorn==0.35.0
# via chromadb
@@ -379,9 +564,13 @@ werkzeug==3.1.3
# via flask
# via flask-cors
wrapt==1.17.2
+ # via deprecated
+ # via llama-index-core
# via smart-open
wsproto==1.2.0
# via simple-websocket
yapf==0.43.0
+yarl==1.20.1
+ # via aiohttp
zipp==3.23.0
# via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index f0d2f08..80be97b 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -10,19 +10,33 @@
# universal: false
-e file:.
+aiohappyeyeballs==2.6.1
+ # via aiohttp
+aiohttp==3.12.15
+ # via huggingface-hub
+ # via llama-index-core
+aiosignal==1.4.0
+ # via aiohttp
+aiosqlite==0.21.0
+ # via llama-index-core
annotated-types==0.7.0
# via pydantic
anyio==4.9.0
# via httpx
+ # via openai
# via watchfiles
attrs==25.3.0
+ # via aiohttp
# via jsonschema
# via referencing
backoff==2.2.1
# via posthog
+banks==2.2.0
+ # via llama-index-core
bcrypt==4.3.0
# via chromadb
beautifulsoup4==4.13.4
+ # via llama-index-readers-file
# via umapyai
blinker==1.9.0
# via flask
@@ -40,6 +54,7 @@ certifi==2025.7.14
# via httpcore
# via httpx
# via kubernetes
+ # via llama-cloud
# via requests
charset-normalizer==3.4.2
# via requests
@@ -47,10 +62,14 @@ chromadb==1.0.15
# via umapyai
click==8.2.1
# via flask
+ # via llama-cloud-services
+ # via nltk
# via typer
# via uvicorn
cloudpathlib==0.21.1
# via weasel
+colorama==0.4.6
+ # via griffe
coloredlogs==15.0.1
# via onnxruntime
confection==0.1.5
@@ -60,7 +79,19 @@ cymem==2.0.11
# via preshed
# via spacy
# via thinc
+dataclasses-json==0.6.7
+ # via llama-index-core
+defusedxml==0.7.1
+ # via llama-index-readers-file
+deprecated==1.2.18
+ # via banks
+ # via llama-index-core
+ # via llama-index-indices-managed-llama-cloud
+ # via llama-index-instrumentation
+dirtyjson==1.0.8
+ # via llama-index-core
distro==1.9.0
+ # via openai
# via posthog
durationpy==0.10
# via kubernetes
@@ -70,6 +101,8 @@ filelock==3.18.0
# via huggingface-hub
# via torch
# via transformers
+filetype==1.2.0
+ # via llama-index-core
flask==3.1.1
# via flask-cors
# via flask-sock
@@ -80,39 +113,61 @@ flask-sock==0.7.0
# via umapyai
flatbuffers==25.2.10
# via onnxruntime
+frozenlist==1.7.0
+ # via aiohttp
+ # via aiosignal
fsspec==2025.7.0
# via huggingface-hub
+ # via llama-index-core
# via torch
google-auth==2.40.3
# via kubernetes
googleapis-common-protos==1.70.0
# via opentelemetry-exporter-otlp-proto-grpc
+greenlet==3.2.4
+ # via sqlalchemy
+griffe==1.12.1
+ # via banks
grpcio==1.74.0
# via chromadb
+ # via llama-index-vector-stores-qdrant
# via opentelemetry-exporter-otlp-proto-grpc
+ # via qdrant-client
h11==0.16.0
# via httpcore
# via uvicorn
# via wsproto
+h2==4.2.0
+ # via httpx
hf-xet==1.1.5
# via huggingface-hub
+hpack==4.1.0
+ # via h2
httpcore==1.0.9
# via httpx
httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via chromadb
+ # via llama-cloud
+ # via llama-index-core
# via ollama
+ # via openai
+ # via qdrant-client
huggingface-hub==0.34.1
+ # via llama-index-embeddings-huggingface
# via sentence-transformers
# via tokenizers
# via transformers
humanfriendly==10.0
# via coloredlogs
+hyperframe==6.1.0
+ # via h2
idna==3.10
# via anyio
# via httpx
# via requests
+ # via yarl
importlib-metadata==8.7.0
# via opentelemetry-api
importlib-resources==6.5.2
@@ -120,10 +175,14 @@ importlib-resources==6.5.2
itsdangerous==2.2.0
# via flask
jinja2==3.1.6
+ # via banks
# via flask
# via spacy
# via torch
+jiter==0.10.0
+ # via openai
joblib==1.5.1
+ # via nltk
# via scikit-learn
jsonschema==4.25.0
# via chromadb
@@ -135,6 +194,51 @@ langcodes==3.5.0
# via spacy
language-data==1.3.0
# via langcodes
+llama-cloud==0.1.35
+ # via llama-cloud-services
+ # via llama-index-indices-managed-llama-cloud
+llama-cloud-services==0.6.54
+ # via llama-parse
+llama-index==0.13.2
+ # via umapyai
+llama-index-cli==0.5.0
+ # via llama-index
+llama-index-core==0.13.2
+ # via llama-cloud-services
+ # via llama-index
+ # via llama-index-cli
+ # via llama-index-embeddings-huggingface
+ # via llama-index-embeddings-openai
+ # via llama-index-indices-managed-llama-cloud
+ # via llama-index-llms-ollama
+ # via llama-index-llms-openai
+ # via llama-index-readers-file
+ # via llama-index-readers-llama-parse
+ # via llama-index-vector-stores-qdrant
+llama-index-embeddings-huggingface==0.6.0
+ # via umapyai
+llama-index-embeddings-openai==0.5.0
+ # via llama-index
+ # via llama-index-cli
+llama-index-indices-managed-llama-cloud==0.9.1
+ # via llama-index
+llama-index-instrumentation==0.4.0
+ # via llama-index-workflows
+llama-index-llms-ollama==0.7.1
+ # via umapyai
+llama-index-llms-openai==0.5.4
+ # via llama-index
+ # via llama-index-cli
+llama-index-readers-file==0.5.1
+ # via llama-index
+llama-index-readers-llama-parse==0.5.0
+ # via llama-index
+llama-index-vector-stores-qdrant==0.7.1
+ # via umapyai
+llama-index-workflows==1.3.0
+ # via llama-index-core
+llama-parse==0.6.54
+ # via llama-index-readers-llama-parse
loguru==0.7.3
# via umapyai
marisa-trie==1.2.1
@@ -145,22 +249,38 @@ markupsafe==3.0.2
# via flask
# via jinja2
# via werkzeug
+marshmallow==3.26.1
+ # via dataclasses-json
mdurl==0.1.2
# via markdown-it-py
mmh3==5.1.0
# via chromadb
mpmath==1.3.0
# via sympy
+multidict==6.6.4
+ # via aiohttp
+ # via yarl
murmurhash==1.0.13
# via preshed
# via spacy
# via thinc
+mypy-extensions==1.1.0
+ # via typing-inspect
+nest-asyncio==1.6.0
+ # via llama-index-core
networkx==3.5
+ # via llama-index-core
# via torch
+nltk==3.9.1
+ # via llama-index
+ # via llama-index-core
numpy==2.3.2
# via blis
# via chromadb
+ # via llama-index-core
# via onnxruntime
+ # via pandas
+ # via qdrant-client
# via scikit-learn
# via scipy
# via spacy
@@ -170,9 +290,13 @@ oauthlib==3.3.1
# via kubernetes
# via requests-oauthlib
ollama==0.5.2
+ # via llama-index-llms-ollama
# via umapyai
onnxruntime==1.22.1
# via chromadb
+openai==1.99.9
+ # via llama-index-embeddings-openai
+ # via llama-index-llms-openai
opentelemetry-api==1.35.0
# via chromadb
# via opentelemetry-exporter-otlp-proto-grpc
@@ -197,22 +321,36 @@ overrides==7.7.0
packaging==25.0
# via build
# via huggingface-hub
+ # via marshmallow
# via onnxruntime
# via spacy
# via thinc
# via transformers
# via weasel
+pandas==2.2.3
+ # via llama-index-readers-file
pillow==11.3.0
+ # via llama-index-core
# via sentence-transformers
+platformdirs==4.3.8
+ # via banks
+ # via llama-cloud-services
+ # via llama-index-core
+portalocker==3.2.0
+ # via qdrant-client
posthog==5.4.0
# via chromadb
preshed==3.0.10
# via spacy
# via thinc
+propcache==0.3.2
+ # via aiohttp
+ # via yarl
protobuf==6.31.1
# via googleapis-common-protos
# via onnxruntime
# via opentelemetry-proto
+ # via qdrant-client
psutil==7.0.0
# via umapyai
pyasn1==0.6.1
@@ -223,9 +361,17 @@ pyasn1-modules==0.4.2
pybase64==1.4.2
# via chromadb
pydantic==2.11.7
+ # via banks
# via chromadb
# via confection
+ # via llama-cloud
+ # via llama-cloud-services
+ # via llama-index-core
+ # via llama-index-instrumentation
+ # via llama-index-workflows
# via ollama
+ # via openai
+ # via qdrant-client
# via spacy
# via thinc
# via weasel
@@ -233,32 +379,46 @@ pydantic-core==2.33.2
# via pydantic
pygments==2.19.2
# via rich
+pypdf==5.9.0
+ # via llama-index-readers-file
pypika==0.48.9
# via chromadb
pyproject-hooks==1.2.0
# via build
python-dateutil==2.9.0.post0
# via kubernetes
+ # via pandas
# via posthog
python-dotenv==1.1.1
+ # via llama-cloud-services
# via uvicorn
+pytz==2025.2
+ # via pandas
pyyaml==6.0.2
# via chromadb
# via huggingface-hub
# via kubernetes
+ # via llama-index-core
# via transformers
# via uvicorn
+qdrant-client==1.15.1
+ # via llama-index-vector-stores-qdrant
+ # via umapyai
referencing==0.36.2
# via jsonschema
# via jsonschema-specifications
regex==2024.11.6
+ # via nltk
+ # via tiktoken
# via transformers
requests==2.32.4
# via huggingface-hub
# via kubernetes
+ # via llama-index-core
# via posthog
# via requests-oauthlib
# via spacy
+ # via tiktoken
# via transformers
# via umapyai
# via weasel
@@ -280,8 +440,10 @@ scipy==1.16.1
# via scikit-learn
# via sentence-transformers
sentence-transformers==5.0.0
+ # via llama-index-embeddings-huggingface
# via umapyai
setuptools==80.9.0
+ # via llama-index-core
# via marisa-trie
# via spacy
# via thinc
@@ -298,6 +460,7 @@ smart-open==7.3.0.post1
# via weasel
sniffio==1.3.1
# via anyio
+ # via openai
soupsieve==2.7
# via beautifulsoup4
spacy==3.8.7
@@ -306,41 +469,56 @@ spacy-legacy==3.0.12
# via spacy
spacy-loggers==1.0.5
# via spacy
+sqlalchemy==2.0.43
+ # via llama-index-core
srsly==2.5.1
# via confection
# via spacy
# via thinc
# via weasel
+striprtf==0.0.26
+ # via llama-index-readers-file
sympy==1.14.0
# via onnxruntime
# via torch
tenacity==9.1.2
# via chromadb
+ # via llama-cloud-services
+ # via llama-index-core
thinc==8.3.6
# via spacy
threadpoolctl==3.6.0
# via scikit-learn
+tiktoken==0.11.0
+ # via llama-index-core
tokenizers==0.21.2
# via chromadb
# via transformers
-torch==2.7.1
+torch==2.8.0
# via sentence-transformers
tqdm==4.67.1
# via chromadb
# via huggingface-hub
+ # via llama-index-core
+ # via nltk
+ # via openai
# via sentence-transformers
# via spacy
# via transformers
-transformers==4.54.0
+transformers==4.55.2
# via sentence-transformers
typer==0.16.0
# via chromadb
# via spacy
# via weasel
typing-extensions==4.14.1
+ # via aiosqlite
# via beautifulsoup4
# via chromadb
# via huggingface-hub
+ # via llama-index-core
+ # via llama-index-workflows
+ # via openai
# via opentelemetry-api
# via opentelemetry-exporter-otlp-proto-grpc
# via opentelemetry-sdk
@@ -348,13 +526,21 @@ typing-extensions==4.14.1
# via pydantic
# via pydantic-core
# via sentence-transformers
+ # via sqlalchemy
# via torch
# via typer
+ # via typing-inspect
# via typing-inspection
+typing-inspect==0.9.0
+ # via dataclasses-json
+ # via llama-index-core
typing-inspection==0.4.1
# via pydantic
+tzdata==2025.2
+ # via pandas
urllib3==2.5.0
# via kubernetes
+ # via qdrant-client
# via requests
uvicorn==0.35.0
# via chromadb
@@ -376,8 +562,12 @@ werkzeug==3.1.3
# via flask
# via flask-cors
wrapt==1.17.2
+ # via deprecated
+ # via llama-index-core
# via smart-open
wsproto==1.2.0
# via simple-websocket
+yarl==1.20.1
+ # via aiohttp
zipp==3.23.0
# via importlib-metadata
diff --git a/src/umapyai_alternative/__init__.py b/src/umapyai_alternative/__init__.py
new file mode 100644
index 0000000..9e613a1
--- /dev/null
+++ b/src/umapyai_alternative/__init__.py
@@ -0,0 +1,96 @@
+import qdrant_client
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
+from llama_index.core.storage import StorageContext
+from llama_index.llms.ollama import Ollama
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.vector_stores.qdrant import QdrantVectorStore
+import logging
+import threading
+import sys
+import time
+
+
+def _wait_animation(stop_event: threading.Event, interval: float = 0.35):
+ frames = ["", ".", "..", "..."]
+ max_length = max(len(frame) for frame in frames)
+ index = 0
+
+ while not stop_event.is_set():
+ frame = frames[index % len(frames)]
+
+ sys.stdout.write("\r" + frame + " " * (max_length - len(frame)))
+
+ index += 1
+
+ time.sleep(interval)
+
+ sys.stdout.write("\r" + " " * max_length + "\r")
+ sys.stdout.flush()
+
+
+def main():
+ logging.basicConfig(level=logging.WARNING)
+
+ documents = SimpleDirectoryReader("./uma_articles_clean").load_data()
+ client = qdrant_client.QdrantClient(path="./qdrant_data")
+ vector_store = QdrantVectorStore(client=client, collection_name="umamusume")
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
+ llm = Ollama(model="gpt-oss:20b", request_timeout=120.0)
+ embedding_model = HuggingFaceEmbedding(
+ model_name="sentence-transformers/all-MiniLM-L6-v2")
+
+ Settings.llm = llm
+ Settings.embed_model = embedding_model
+
+ index = VectorStoreIndex.from_documents(
+ documents,
+ storage_context=storage_context,
+ )
+ query_engine = index.as_query_engine(streaming=True)
+
+ try:
+ while True:
+ try:
+ user_query = input("> ").strip()
+ except EOFError:
+ print()
+
+ break
+
+ if not user_query:
+ continue
+
+ if user_query.lower() in {"quit", "exit", "q"}:
+ break
+
+ response_stream = query_engine.query(user_query)
+ stop_event = threading.Event()
+
+ print()
+
+ animation_thread = threading.Thread(
+ target=_wait_animation, args=(stop_event,), daemon=True)
+
+ animation_thread.start()
+
+ got_first_token = False
+
+ try:
+ for token in response_stream.response_gen:
+ if not got_first_token:
+ stop_event.set()
+ animation_thread.join()
+
+ got_first_token = True
+
+ print(token, end="", flush=True)
+ finally:
+ if not got_first_token:
+ stop_event.set()
+ animation_thread.join()
+
+ print("\n")
+ except KeyboardInterrupt:
+ print()
+
+ print()
diff --git a/src/umapyai_alternative/__main__.py b/src/umapyai_alternative/__main__.py
new file mode 100644
index 0000000..1680fe8
--- /dev/null
+++ b/src/umapyai_alternative/__main__.py
@@ -0,0 +1,4 @@
+import umapyai_alternative
+import sys
+
+sys.exit(umapyai_alternative.main())