From 88bc8b64bb0bca36f3bdf51f51387debd4137d1f Mon Sep 17 00:00:00 2001 From: Fuwn Date: Tue, 3 Feb 2026 21:19:35 -0800 Subject: feat(sdk): Add local embedding support with transformers.js --- packages/sdk/package.json | 1 + packages/sdk/src/embedding-provider.ts | 5 +++ packages/sdk/src/embedding-service.ts | 45 ++++++++++--------- packages/sdk/src/index.ts | 12 +++++ packages/sdk/src/local-embedding-provider.ts | 64 +++++++++++++++++++++++++++ packages/sdk/src/openai-embedding-provider.ts | 46 +++++++++++++++++++ 6 files changed, 152 insertions(+), 21 deletions(-) create mode 100644 packages/sdk/src/embedding-provider.ts create mode 100644 packages/sdk/src/local-embedding-provider.ts create mode 100644 packages/sdk/src/openai-embedding-provider.ts (limited to 'packages') diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 21e7e26..e3dac68 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -28,6 +28,7 @@ }, "dependencies": { "@supabase/supabase-js": "^2.94.0", + "@xenova/transformers": "^2.17.2", "openai": "^6.17.0" } } diff --git a/packages/sdk/src/embedding-provider.ts b/packages/sdk/src/embedding-provider.ts new file mode 100644 index 0000000..2f6e9a4 --- /dev/null +++ b/packages/sdk/src/embedding-provider.ts @@ -0,0 +1,5 @@ +export type EmbeddingProvider = { + generate(text: string): Promise; + generateBatch(texts: string[]): Promise; + readonly dimensions: number; +}; diff --git a/packages/sdk/src/embedding-service.ts b/packages/sdk/src/embedding-service.ts index 9d3367f..274bdee 100644 --- a/packages/sdk/src/embedding-service.ts +++ b/packages/sdk/src/embedding-service.ts @@ -1,38 +1,41 @@ -import OpenAI from "openai"; +import type { EmbeddingProvider } from "./embedding-provider.js"; +import { + OpenAIEmbeddingProvider, + type OpenAIEmbeddingModel, +} from "./openai-embedding-provider.js"; -export type EmbeddingModel = - | "text-embedding-3-small" - | "text-embedding-3-large"; +export type EmbeddingModel = OpenAIEmbeddingModel; export type EmbeddingServiceConfiguration = { apiKey: string; model?: EmbeddingModel; }; -export class EmbeddingService { - private client: OpenAI; - private model: EmbeddingModel; +export class EmbeddingService implements EmbeddingProvider { + private provider: EmbeddingProvider; constructor(configuration: EmbeddingServiceConfiguration) { - this.client = new OpenAI({ apiKey: configuration.apiKey }); - this.model = configuration.model ?? "text-embedding-3-small"; + this.provider = new OpenAIEmbeddingProvider({ + apiKey: configuration.apiKey, + model: configuration.model, + }); } - async generate(text: string): Promise { - const response = await this.client.embeddings.create({ - model: this.model, - input: text, - }); + get dimensions(): number { + return this.provider.dimensions; + } - return response.data[0]?.embedding ?? []; + async generate(text: string): Promise { + return this.provider.generate(text); } async generateBatch(texts: string[]): Promise { - const response = await this.client.embeddings.create({ - model: this.model, - input: texts, - }); - - return response.data.map((item) => item.embedding); + return this.provider.generateBatch(texts); } } + +export function createEmbeddingService( + provider: EmbeddingProvider, +): EmbeddingProvider { + return provider; +} diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index e02192b..7c7bef2 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -28,6 +28,15 @@ export type { FolderCreateInput, FolderUpdateInput, } from "./project-store.js"; +export type { EmbeddingProvider } from "./embedding-provider.js"; +export type { + OpenAIEmbeddingModel, + OpenAIEmbeddingProviderConfiguration, +} from "./openai-embedding-provider.js"; +export type { + LocalEmbeddingModel, + LocalEmbeddingProviderConfiguration, +} from "./local-embedding-provider.js"; export { success, failure } from "./result.js"; export { InMemoryStore } from "./in-memory-store.js"; @@ -40,6 +49,9 @@ export { SupabaseStore } from "./supabase-store.js"; export { SupabaseProjectStore } from "./supabase-project-store.js"; export { EmbeddingService, + createEmbeddingService, type EmbeddingModel, type EmbeddingServiceConfiguration, } from "./embedding-service.js"; +export { OpenAIEmbeddingProvider } from "./openai-embedding-provider.js"; +export { LocalEmbeddingProvider } from "./local-embedding-provider.js"; diff --git a/packages/sdk/src/local-embedding-provider.ts b/packages/sdk/src/local-embedding-provider.ts new file mode 100644 index 0000000..47e5860 --- /dev/null +++ b/packages/sdk/src/local-embedding-provider.ts @@ -0,0 +1,64 @@ +import type { EmbeddingProvider } from "./embedding-provider.js"; + +export type LocalEmbeddingModel = + | "Xenova/all-MiniLM-L6-v2" + | "Xenova/bge-small-en-v1.5" + | "Xenova/bge-base-en-v1.5"; + +export type LocalEmbeddingProviderConfiguration = { + model?: LocalEmbeddingModel; +}; + +const MODEL_DIMENSIONS: Record = { + "Xenova/all-MiniLM-L6-v2": 384, + "Xenova/bge-small-en-v1.5": 384, + "Xenova/bge-base-en-v1.5": 768, +}; + +type Pipeline = ( + texts: string[], + options: { pooling: string; normalize: boolean }, +) => Promise<{ tolist: () => number[][] }>; + +export class LocalEmbeddingProvider implements EmbeddingProvider { + private model: LocalEmbeddingModel; + private pipelinePromise: Promise | null = null; + readonly dimensions: number; + + constructor(configuration: LocalEmbeddingProviderConfiguration = {}) { + this.model = configuration.model ?? "Xenova/all-MiniLM-L6-v2"; + this.dimensions = MODEL_DIMENSIONS[this.model]; + } + + private getPipeline(): Promise { + if (!this.pipelinePromise) { + this.pipelinePromise = (async () => { + const { pipeline } = await import("@xenova/transformers"); + + return (await pipeline("feature-extraction", this.model)) as Pipeline; + })(); + } + + return this.pipelinePromise; + } + + async generate(text: string): Promise { + const pipeline = await this.getPipeline(); + const output = await pipeline([text], { + pooling: "mean", + normalize: true, + }); + + return output.tolist()[0] ?? []; + } + + async generateBatch(texts: string[]): Promise { + const pipeline = await this.getPipeline(); + const output = await pipeline(texts, { + pooling: "mean", + normalize: true, + }); + + return output.tolist(); + } +} diff --git a/packages/sdk/src/openai-embedding-provider.ts b/packages/sdk/src/openai-embedding-provider.ts new file mode 100644 index 0000000..fb96155 --- /dev/null +++ b/packages/sdk/src/openai-embedding-provider.ts @@ -0,0 +1,46 @@ +import OpenAI from "openai"; +import type { EmbeddingProvider } from "./embedding-provider.js"; + +export type OpenAIEmbeddingModel = + | "text-embedding-3-small" + | "text-embedding-3-large"; + +export type OpenAIEmbeddingProviderConfiguration = { + apiKey: string; + model?: OpenAIEmbeddingModel; +}; + +const MODEL_DIMENSIONS: Record = { + "text-embedding-3-small": 1536, + "text-embedding-3-large": 3072, +}; + +export class OpenAIEmbeddingProvider implements EmbeddingProvider { + private client: OpenAI; + private model: OpenAIEmbeddingModel; + readonly dimensions: number; + + constructor(configuration: OpenAIEmbeddingProviderConfiguration) { + this.client = new OpenAI({ apiKey: configuration.apiKey }); + this.model = configuration.model ?? "text-embedding-3-small"; + this.dimensions = MODEL_DIMENSIONS[this.model]; + } + + async generate(text: string): Promise { + const response = await this.client.embeddings.create({ + model: this.model, + input: text, + }); + + return response.data[0]?.embedding ?? []; + } + + async generateBatch(texts: string[]): Promise { + const response = await this.client.embeddings.create({ + model: this.model, + input: texts, + }); + + return response.data.map((item) => item.embedding); + } +} -- cgit v1.2.3