aboutsummaryrefslogtreecommitdiff
path: root/packages/sdk
diff options
context:
space:
mode:
authorFuwn <[email protected]>2026-02-03 21:19:35 -0800
committerFuwn <[email protected]>2026-02-03 21:21:28 -0800
commit88bc8b64bb0bca36f3bdf51f51387debd4137d1f (patch)
treeac4cd5ddcc2f7ac28e99fdaf5f1019f68df8c8f4 /packages/sdk
parentstyle(typescript-config): Format package.json (diff)
downloadarchived-imemio-88bc8b64bb0bca36f3bdf51f51387debd4137d1f.tar.xz
archived-imemio-88bc8b64bb0bca36f3bdf51f51387debd4137d1f.zip
feat(sdk): Add local embedding support with transformers.js
Diffstat (limited to 'packages/sdk')
-rw-r--r--packages/sdk/package.json1
-rw-r--r--packages/sdk/src/embedding-provider.ts5
-rw-r--r--packages/sdk/src/embedding-service.ts45
-rw-r--r--packages/sdk/src/index.ts12
-rw-r--r--packages/sdk/src/local-embedding-provider.ts64
-rw-r--r--packages/sdk/src/openai-embedding-provider.ts46
6 files changed, 152 insertions, 21 deletions
diff --git a/packages/sdk/package.json b/packages/sdk/package.json
index 21e7e26..e3dac68 100644
--- a/packages/sdk/package.json
+++ b/packages/sdk/package.json
@@ -28,6 +28,7 @@
},
"dependencies": {
"@supabase/supabase-js": "^2.94.0",
+ "@xenova/transformers": "^2.17.2",
"openai": "^6.17.0"
}
}
diff --git a/packages/sdk/src/embedding-provider.ts b/packages/sdk/src/embedding-provider.ts
new file mode 100644
index 0000000..2f6e9a4
--- /dev/null
+++ b/packages/sdk/src/embedding-provider.ts
@@ -0,0 +1,5 @@
+export type EmbeddingProvider = {
+ generate(text: string): Promise<number[]>;
+ generateBatch(texts: string[]): Promise<number[][]>;
+ readonly dimensions: number;
+};
diff --git a/packages/sdk/src/embedding-service.ts b/packages/sdk/src/embedding-service.ts
index 9d3367f..274bdee 100644
--- a/packages/sdk/src/embedding-service.ts
+++ b/packages/sdk/src/embedding-service.ts
@@ -1,38 +1,41 @@
-import OpenAI from "openai";
+import type { EmbeddingProvider } from "./embedding-provider.js";
+import {
+ OpenAIEmbeddingProvider,
+ type OpenAIEmbeddingModel,
+} from "./openai-embedding-provider.js";
-export type EmbeddingModel =
- | "text-embedding-3-small"
- | "text-embedding-3-large";
+export type EmbeddingModel = OpenAIEmbeddingModel;
export type EmbeddingServiceConfiguration = {
apiKey: string;
model?: EmbeddingModel;
};
-export class EmbeddingService {
- private client: OpenAI;
- private model: EmbeddingModel;
+export class EmbeddingService implements EmbeddingProvider {
+ private provider: EmbeddingProvider;
constructor(configuration: EmbeddingServiceConfiguration) {
- this.client = new OpenAI({ apiKey: configuration.apiKey });
- this.model = configuration.model ?? "text-embedding-3-small";
+ this.provider = new OpenAIEmbeddingProvider({
+ apiKey: configuration.apiKey,
+ model: configuration.model,
+ });
}
- async generate(text: string): Promise<number[]> {
- const response = await this.client.embeddings.create({
- model: this.model,
- input: text,
- });
+ get dimensions(): number {
+ return this.provider.dimensions;
+ }
- return response.data[0]?.embedding ?? [];
+ async generate(text: string): Promise<number[]> {
+ return this.provider.generate(text);
}
async generateBatch(texts: string[]): Promise<number[][]> {
- const response = await this.client.embeddings.create({
- model: this.model,
- input: texts,
- });
-
- return response.data.map((item) => item.embedding);
+ return this.provider.generateBatch(texts);
}
}
+
+export function createEmbeddingService(
+ provider: EmbeddingProvider,
+): EmbeddingProvider {
+ return provider;
+}
diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts
index e02192b..7c7bef2 100644
--- a/packages/sdk/src/index.ts
+++ b/packages/sdk/src/index.ts
@@ -28,6 +28,15 @@ export type {
FolderCreateInput,
FolderUpdateInput,
} from "./project-store.js";
+export type { EmbeddingProvider } from "./embedding-provider.js";
+export type {
+ OpenAIEmbeddingModel,
+ OpenAIEmbeddingProviderConfiguration,
+} from "./openai-embedding-provider.js";
+export type {
+ LocalEmbeddingModel,
+ LocalEmbeddingProviderConfiguration,
+} from "./local-embedding-provider.js";
export { success, failure } from "./result.js";
export { InMemoryStore } from "./in-memory-store.js";
@@ -40,6 +49,9 @@ export { SupabaseStore } from "./supabase-store.js";
export { SupabaseProjectStore } from "./supabase-project-store.js";
export {
EmbeddingService,
+ createEmbeddingService,
type EmbeddingModel,
type EmbeddingServiceConfiguration,
} from "./embedding-service.js";
+export { OpenAIEmbeddingProvider } from "./openai-embedding-provider.js";
+export { LocalEmbeddingProvider } from "./local-embedding-provider.js";
diff --git a/packages/sdk/src/local-embedding-provider.ts b/packages/sdk/src/local-embedding-provider.ts
new file mode 100644
index 0000000..47e5860
--- /dev/null
+++ b/packages/sdk/src/local-embedding-provider.ts
@@ -0,0 +1,64 @@
+import type { EmbeddingProvider } from "./embedding-provider.js";
+
+export type LocalEmbeddingModel =
+ | "Xenova/all-MiniLM-L6-v2"
+ | "Xenova/bge-small-en-v1.5"
+ | "Xenova/bge-base-en-v1.5";
+
+export type LocalEmbeddingProviderConfiguration = {
+ model?: LocalEmbeddingModel;
+};
+
+const MODEL_DIMENSIONS: Record<LocalEmbeddingModel, number> = {
+ "Xenova/all-MiniLM-L6-v2": 384,
+ "Xenova/bge-small-en-v1.5": 384,
+ "Xenova/bge-base-en-v1.5": 768,
+};
+
+type Pipeline = (
+ texts: string[],
+ options: { pooling: string; normalize: boolean },
+) => Promise<{ tolist: () => number[][] }>;
+
+export class LocalEmbeddingProvider implements EmbeddingProvider {
+ private model: LocalEmbeddingModel;
+ private pipelinePromise: Promise<Pipeline> | null = null;
+ readonly dimensions: number;
+
+ constructor(configuration: LocalEmbeddingProviderConfiguration = {}) {
+ this.model = configuration.model ?? "Xenova/all-MiniLM-L6-v2";
+ this.dimensions = MODEL_DIMENSIONS[this.model];
+ }
+
+ private getPipeline(): Promise<Pipeline> {
+ if (!this.pipelinePromise) {
+ this.pipelinePromise = (async () => {
+ const { pipeline } = await import("@xenova/transformers");
+
+ return (await pipeline("feature-extraction", this.model)) as Pipeline;
+ })();
+ }
+
+ return this.pipelinePromise;
+ }
+
+ async generate(text: string): Promise<number[]> {
+ const pipeline = await this.getPipeline();
+ const output = await pipeline([text], {
+ pooling: "mean",
+ normalize: true,
+ });
+
+ return output.tolist()[0] ?? [];
+ }
+
+ async generateBatch(texts: string[]): Promise<number[][]> {
+ const pipeline = await this.getPipeline();
+ const output = await pipeline(texts, {
+ pooling: "mean",
+ normalize: true,
+ });
+
+ return output.tolist();
+ }
+}
diff --git a/packages/sdk/src/openai-embedding-provider.ts b/packages/sdk/src/openai-embedding-provider.ts
new file mode 100644
index 0000000..fb96155
--- /dev/null
+++ b/packages/sdk/src/openai-embedding-provider.ts
@@ -0,0 +1,46 @@
+import OpenAI from "openai";
+import type { EmbeddingProvider } from "./embedding-provider.js";
+
+export type OpenAIEmbeddingModel =
+ | "text-embedding-3-small"
+ | "text-embedding-3-large";
+
+export type OpenAIEmbeddingProviderConfiguration = {
+ apiKey: string;
+ model?: OpenAIEmbeddingModel;
+};
+
+const MODEL_DIMENSIONS: Record<OpenAIEmbeddingModel, number> = {
+ "text-embedding-3-small": 1536,
+ "text-embedding-3-large": 3072,
+};
+
+export class OpenAIEmbeddingProvider implements EmbeddingProvider {
+ private client: OpenAI;
+ private model: OpenAIEmbeddingModel;
+ readonly dimensions: number;
+
+ constructor(configuration: OpenAIEmbeddingProviderConfiguration) {
+ this.client = new OpenAI({ apiKey: configuration.apiKey });
+ this.model = configuration.model ?? "text-embedding-3-small";
+ this.dimensions = MODEL_DIMENSIONS[this.model];
+ }
+
+ async generate(text: string): Promise<number[]> {
+ const response = await this.client.embeddings.create({
+ model: this.model,
+ input: text,
+ });
+
+ return response.data[0]?.embedding ?? [];
+ }
+
+ async generateBatch(texts: string[]): Promise<number[][]> {
+ const response = await this.client.embeddings.create({
+ model: this.model,
+ input: texts,
+ });
+
+ return response.data.map((item) => item.embedding);
+ }
+}