1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
import type { EmbeddingProvider } from "./embedding-provider.js";
export type LocalEmbeddingModel =
| "Xenova/all-MiniLM-L6-v2"
| "Xenova/bge-small-en-v1.5"
| "Xenova/bge-base-en-v1.5";
export type LocalEmbeddingProviderConfiguration = {
model?: LocalEmbeddingModel;
};
const MODEL_DIMENSIONS: Record<LocalEmbeddingModel, number> = {
"Xenova/all-MiniLM-L6-v2": 384,
"Xenova/bge-small-en-v1.5": 384,
"Xenova/bge-base-en-v1.5": 768,
};
type Pipeline = (
texts: string[],
options: { pooling: string; normalize: boolean },
) => Promise<{ tolist: () => number[][] }>;
export class LocalEmbeddingProvider implements EmbeddingProvider {
private model: LocalEmbeddingModel;
private pipelinePromise: Promise<Pipeline> | null = null;
readonly dimensions: number;
constructor(configuration: LocalEmbeddingProviderConfiguration = {}) {
this.model = configuration.model ?? "Xenova/all-MiniLM-L6-v2";
this.dimensions = MODEL_DIMENSIONS[this.model];
}
private getPipeline(): Promise<Pipeline> {
if (!this.pipelinePromise) {
this.pipelinePromise = (async () => {
const { pipeline } = await import("@xenova/transformers");
return (await pipeline("feature-extraction", this.model)) as Pipeline;
})();
}
return this.pipelinePromise;
}
async generate(text: string): Promise<number[]> {
const pipeline = await this.getPipeline();
const output = await pipeline([text], {
pooling: "mean",
normalize: true,
});
return output.tolist()[0] ?? [];
}
async generateBatch(texts: string[]): Promise<number[][]> {
const pipeline = await this.getPipeline();
const output = await pipeline(texts, {
pooling: "mean",
normalize: true,
});
return output.tolist();
}
}
|