aboutsummaryrefslogtreecommitdiff
path: root/apps/backend/src/utils/extractor.ts
blob: 8201cd282ae7bcb7045c39fe0ddf3e8537c4e271 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import { Env } from "../types";

export const extractPageContent = async (content: string, env: Env) => {
  console.log("content", content);
  const resp = await fetch(`https://md.dhr.wtf?url=${content}?nocache`);

  if (!resp.ok) {
    throw new Error(
      `Failed to fetch ${content}: ${resp.statusText}` + (await resp.text())
    );
  }

  const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}?nocache`);

  if (!metadataResp.ok) {
    throw new Error(
      `Failed to fetch metadata for ${content}: ${metadataResp.statusText}` +
        (await metadataResp.text())
    );
  }

  const metadata = await metadataResp.json() as {
    title?: string;
    description?: string;
    image?: string;
    favicon?: string;
  };

  const responseText = await resp.text();

  try {
    const json:  {
      contentToVectorize: string;
      contentToSave: string;
      title?: string;
      description?: string;
      image?: string;
      favicon?: string;
    } = {
      contentToSave: responseText,
      contentToVectorize: responseText,
      title: metadata.title,
      description: metadata.description,
      image: metadata.image,
      favicon: metadata.favicon,
    };
    return json;
  } catch (e) {
    throw new Error(`Failed to parse JSON from ${content}: ${e}`);
  }
};