aboutsummaryrefslogtreecommitdiff
path: root/packages/sdk/src/local-embedding-provider.ts
blob: 47e58602ba8730ce8d0b7d5eb426d24ce97d1ad9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import type { EmbeddingProvider } from "./embedding-provider.js";

export type LocalEmbeddingModel =
	| "Xenova/all-MiniLM-L6-v2"
	| "Xenova/bge-small-en-v1.5"
	| "Xenova/bge-base-en-v1.5";

export type LocalEmbeddingProviderConfiguration = {
	model?: LocalEmbeddingModel;
};

const MODEL_DIMENSIONS: Record<LocalEmbeddingModel, number> = {
	"Xenova/all-MiniLM-L6-v2": 384,
	"Xenova/bge-small-en-v1.5": 384,
	"Xenova/bge-base-en-v1.5": 768,
};

type Pipeline = (
	texts: string[],
	options: { pooling: string; normalize: boolean },
) => Promise<{ tolist: () => number[][] }>;

export class LocalEmbeddingProvider implements EmbeddingProvider {
	private model: LocalEmbeddingModel;
	private pipelinePromise: Promise<Pipeline> | null = null;
	readonly dimensions: number;

	constructor(configuration: LocalEmbeddingProviderConfiguration = {}) {
		this.model = configuration.model ?? "Xenova/all-MiniLM-L6-v2";
		this.dimensions = MODEL_DIMENSIONS[this.model];
	}

	private getPipeline(): Promise<Pipeline> {
		if (!this.pipelinePromise) {
			this.pipelinePromise = (async () => {
				const { pipeline } = await import("@xenova/transformers");

				return (await pipeline("feature-extraction", this.model)) as Pipeline;
			})();
		}

		return this.pipelinePromise;
	}

	async generate(text: string): Promise<number[]> {
		const pipeline = await this.getPipeline();
		const output = await pipeline([text], {
			pooling: "mean",
			normalize: true,
		});

		return output.tolist()[0] ?? [];
	}

	async generateBatch(texts: string[]): Promise<number[][]> {
		const pipeline = await this.getPipeline();
		const output = await pipeline(texts, {
			pooling: "mean",
			normalize: true,
		});

		return output.tolist();
	}
}