New Version of Supermemory Consumer App

author: Mahesh Sanikommmu <[email protected]> 2025-08-16 18:50:10 -0700
committer: Mahesh Sanikommmu <[email protected]> 2025-08-16 18:50:10 -0700
commit: 39003aff23d64ff1d96074d71521f6023c9bec01 (patch)
tree: 3f870c04b3dce315bba1b21aa2da158494e71774 /apps/backend/src/utils
parent: Merge pull request #355 from supermemoryai/archive (diff)
download: supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.tar.xz
supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.zip
8 files changed, 0 insertions, 863 deletions
diff --git a/apps/backend/src/utils/chunkers.ts b/apps/backend/src/utils/chunkers.ts
deleted file mode 100644
index ce345d29..00000000
--- a/apps/backend/src/utils/chunkers.ts
+++ /dev/null
@@ -1,116 +0,0 @@
-import nlp from "compromise";
-
-export default function chunkText(
-  text: string,
-  maxChunkSize: number,
-  overlap: number = 0.2
-): string[] {
-  // Pre-process text to remove excessive whitespace
-  text = text.replace(/\s+/g, " ").trim();
-
-  const sentences = nlp(text).sentences().out("array");
-  const chunks: {
-    text: string;
-    start: number;
-    end: number;
-    metadata?: {
-      position: string;
-      context?: string;
-    };
-  }[] = [];
-
-  let currentChunk: string[] = [];
-  let currentSize = 0;
-
-  for (let i = 0; i < sentences.length; i++) {
-    const sentence = sentences[i].trim();
-
-    // Skip empty sentences
-    if (!sentence) continue;
-
-    // If a single sentence is longer than maxChunkSize, split it
-    if (sentence.length > maxChunkSize) {
-      if (currentChunk.length > 0) {
-        chunks.push({
-          text: currentChunk.join(" "),
-          start: i - currentChunk.length,
-          end: i - 1,
-          metadata: {
-            position: `${i - currentChunk.length}-${i - 1}`,
-            context: currentChunk[0].substring(0, 100), // First 100 chars for context
-          },
-        });
-        currentChunk = [];
-        currentSize = 0;
-      }
-
-      // Split long sentence into smaller chunks
-      const words = sentence.split(" ");
-      let tempChunk: string[] = [];
-
-      for (const word of words) {
-        if (tempChunk.join(" ").length + word.length > maxChunkSize) {
-          chunks.push({
-            text: tempChunk.join(" "),
-            start: i,
-            end: i,
-            metadata: {
-              position: `${i}`,
-              context: "Split sentence",
-            },
-          });
-          tempChunk = [];
-        }
-        tempChunk.push(word);
-      }
-
-      if (tempChunk.length > 0) {
-        chunks.push({
-          text: tempChunk.join(" "),
-          start: i,
-          end: i,
-          metadata: {
-            position: `${i}`,
-            context: "Split sentence remainder",
-          },
-        });
-      }
-      continue;
-    }
-
-    currentChunk.push(sentence);
-    currentSize += sentence.length;
-
-    if (currentSize >= maxChunkSize) {
-      const overlapSize = Math.floor(currentChunk.length * overlap);
-      chunks.push({
-        text: currentChunk.join(" "),
-        start: i - currentChunk.length + 1,
-        end: i,
-        metadata: {
-          position: `${i - currentChunk.length + 1}-${i}`,
-          context: currentChunk[0].substring(0, 100),
-        },
-      });
-
-      // Keep overlap sentences for next chunk
-      currentChunk = currentChunk.slice(-overlapSize);
-      currentSize = currentChunk.reduce((sum, s) => sum + s.length, 0);
-    }
-  }
-
-  // Handle remaining sentences
-  if (currentChunk.length > 0) {
-    chunks.push({
-      text: currentChunk.join(" "),
-      start: sentences.length - currentChunk.length,
-      end: sentences.length - 1,
-      metadata: {
-        position: `${sentences.length - currentChunk.length}-${sentences.length - 1}`,
-        context: currentChunk[0].substring(0, 100),
-      },
-    });
-  }
-
-  return chunks.map((chunk) => chunk.text);
-}
diff --git a/apps/backend/src/utils/cipher.ts b/apps/backend/src/utils/cipher.ts
deleted file mode 100644
index 3ba2e905..00000000
--- a/apps/backend/src/utils/cipher.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-async function encrypt(data: string, key: string): Promise<string> {
-  try {
-    const encoder = new TextEncoder();
-    const encodedData = encoder.encode(data);
-
-    const baseForIv = encoder.encode(data + key);
-    const ivHash = await crypto.subtle.digest('SHA-256', baseForIv);
-    const iv = new Uint8Array(ivHash).slice(0, 12);
-
-    const cryptoKey = await crypto.subtle.importKey(
-      "raw",
-      encoder.encode(key),
-      { name: "AES-GCM", length: 256 },
-      false,
-      ["encrypt", "decrypt"]
-    );
-
-    const encrypted = await crypto.subtle.encrypt(
-      { name: "AES-GCM", iv: new Uint8Array(iv).buffer as ArrayBuffer },
-      cryptoKey,
-      encodedData
-    );
-
-    const combined = new Uint8Array([...iv, ...new Uint8Array(encrypted)]);
-
-    // Convert to base64 safely
-    const base64 = Buffer.from(combined).toString("base64");
-
-    // Make URL-safe
-    return base64.replace(/\+/g, "-").replace(/\//g, "_").replace(/=+$/, "");
-  } catch (err) {
-    console.error("Encryption error:", err);
-    throw err;
-  }
-}
-
-async function decrypt(encryptedData: string, key: string): Promise<string> {
-  try {
-    // Restore base64 padding and convert URL-safe chars
-    const base64 = encryptedData
-      .replace(/-/g, "+")
-      .replace(/_/g, "/")
-      .padEnd(
-        encryptedData.length + ((4 - (encryptedData.length % 4)) % 4),
-        "="
-      );
-
-    // Use Buffer for safer base64 decoding
-    const combined = Buffer.from(base64, "base64");
-    const combinedArray = new Uint8Array(combined);
-
-    // Extract the IV that was used for encryption
-    const iv = combinedArray.slice(0, 12);
-    const encrypted = combinedArray.slice(12);
-
-    // Import the same key used for encryption
-    const cryptoKey = await crypto.subtle.importKey(
-      "raw",
-      new TextEncoder().encode(key),
-      { name: "AES-GCM", length: 256 },
-      false,
-      ["encrypt", "decrypt"]
-    );
-
-    // Use the extracted IV and key to decrypt
-    const decrypted = await crypto.subtle.decrypt(
-      { name: "AES-GCM", iv: new Uint8Array(iv).buffer as ArrayBuffer },
-      cryptoKey,
-      encrypted.buffer as ArrayBuffer
-    );
-
-    return new TextDecoder().decode(decrypted);
-  } catch (err) {
-    console.error("Decryption error:", err);
-    throw err;
-  }
-}
-
-export { encrypt, decrypt };
-\ No newline at end of file
diff --git a/apps/backend/src/utils/extractDocumentContent.ts b/apps/backend/src/utils/extractDocumentContent.ts
deleted file mode 100644
index 8b7d9256..00000000
--- a/apps/backend/src/utils/extractDocumentContent.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import * as mammoth from "mammoth";
-import { NonRetryableError } from "cloudflare:workflows";
-import { resolvePDFJS } from 'pdfjs-serverless';
-
-interface DocumentContent {
-  content: string;
-  error?: string;
-}
-
-export const extractDocumentContent = async (
-  url: string
-): Promise<DocumentContent> => {
-  try {
-    const fileExtension = url.split(".").pop()?.toLowerCase();
-
-    if (!fileExtension) {
-      throw new Error("Invalid file URL");
-    }
-
-    console.log("file", fileExtension);
-
-    switch (fileExtension) {
-      case "pdf":
-        return await extractPdfContent(url);
-      case "md":
-      case "txt":
-        return await extractTextContent(url);
-      case "doc":
-      case "docx":
-        return await extractWordContent(url);
-      default:
-        throw new NonRetryableError(`Unsupported file type: ${fileExtension}`);
-    }
-  } catch (error) {
-    return {
-      content: "",
-      error: error instanceof Error ? error.message : "Unknown error occurred",
-    };
-  }
-};
-
-async function extractPdfContent(url: string): Promise<DocumentContent> {
-  try {
-    const response = await fetch(url);
-    const arrayBuffer = await response.arrayBuffer();
-
-    // Initialize PDF.js with serverless compatibility
-    const { getDocument } = await resolvePDFJS();
-    
-    // Load the PDF document
-    const pdf = await getDocument({
-      data: arrayBuffer,
-      useSystemFonts: true,
-    }).promise;
-
-    let fullText = "";
-
-    // Extract text from each page
-    for (let i = 1; i <= pdf.numPages; i++) {
-      const page = await pdf.getPage(i);
-      const textContent = await page.getTextContent();
-      const pageText = textContent.items.map((item: any) => item.str).join(" ");
-      fullText += pageText + "\n";
-    }
-
-    return { content: fullText };
-  } catch (error) {
-    console.error("Error extracting PDF content:", error);
-    return {
-      content: "",
-      error: error instanceof Error ? error.message : "Failed to extract PDF content",
-    };
-  }
-}
-
-async function extractTextContent(url: string): Promise<DocumentContent> {
-  const response = await fetch(url);
-  const text = await response.text();
-  return { content: text };
-}
-
-async function extractWordContent(url: string): Promise<DocumentContent> {
-  const response = await fetch(url);
-  const arrayBuffer = await response.arrayBuffer();
-  const result = await mammoth.extractRawText({ arrayBuffer });
-  return { content: result.value };
-}
diff --git a/apps/backend/src/utils/extractor.ts b/apps/backend/src/utils/extractor.ts
deleted file mode 100644
index f033f8e1..00000000
--- a/apps/backend/src/utils/extractor.ts
+++ /dev/null
@@ -1,50 +0,0 @@
-import { Env } from "../types";
-
-export const extractPageContent = async (content: string, env: Env) => {
-  const resp = await fetch(`https://r.jina.ai/${content}`);
-
-  if (!resp.ok) {
-    throw new Error(
-      `Failed to fetch ${content}: ${resp.statusText}` + (await resp.text())
-    );
-  }
-
-  const metadataResp = await fetch(`https://md.dhr.wtf/metadata?url=${content}`);
-
-  if (!metadataResp.ok) {
-    throw new Error(
-      `Failed to fetch metadata for ${content}: ${metadataResp.statusText}` +
-        (await metadataResp.text())
-    );
-  }
-
-  const metadata = await metadataResp.json() as {
-    title?: string;
-    description?: string;
-    image?: string;
-    favicon?: string;
-  };
-
-  const responseText = await resp.text();
-
-  try {
-    const json:  {
-      contentToVectorize: string;
-      contentToSave: string;
-      title?: string;
-      description?: string;
-      image?: string;
-      favicon?: string;
-    } = {
-      contentToSave: responseText,
-      contentToVectorize: responseText,
-      title: metadata.title,
-      description: metadata.description,
-      image: metadata.image,
-      favicon: metadata.favicon,
-    };
-    return json;
-  } catch (e) {
-    throw new Error(`Failed to parse JSON from ${content}: ${e}`);
-  }
-};
diff --git a/apps/backend/src/utils/fetchers.ts b/apps/backend/src/utils/fetchers.ts
deleted file mode 100644
index 2329f48a..00000000
--- a/apps/backend/src/utils/fetchers.ts
+++ /dev/null
@@ -1,143 +0,0 @@
-import { WorkflowStep } from "cloudflare:workers";
-import { isErr, Ok } from "../errors/results";
-import { typeDecider } from "./typeDecider";
-import { Env, WorkflowParams } from "../types";
-import { unrollTweets } from "./tweetsToThreads";
-import { Tweet } from "react-tweet/api";
-import { NonRetryableError } from "cloudflare:workflows";
-import { extractPageContent } from "./extractor";
-import { extractDocumentContent } from "./extractDocumentContent";
-
-export const fetchContent = async (
-  params: WorkflowParams,
-  env: Env,
-  step: WorkflowStep
-) => {
-  const type = typeDecider(params.content);
-
-  if (isErr(type)) {
-    throw type.error;
-  }
-
-  switch (type.value) {
-    case "page":
-      const pageContent = await step?.do(
-        "extract page content",
-        async () => await extractPageContent(params.content, env)
-      );
-      return {
-        ...pageContent,
-        type: "page",
-      };
-
-    case "tweet":
-      const tweetUrl = new URL(params.content);
-      tweetUrl.search = ""; // Remove all search params
-      const tweetId = tweetUrl.pathname.split("/").pop();
-
-      const rawBaseTweetContent = await step.do(
-        "extract tweet content",
-        async () => {
-          const url = `https://cdn.syndication.twimg.com/tweet-result?id=${tweetId}&lang=en&features=tfw_timeline_list%3A%3Btfw_follower_count_sunset%3Atrue%3Btfw_tweet_edit_backend%3Aon%3Btfw_refsrc_session%3Aon%3Btfw_fosnr_soft_interventions_enabled%3Aon%3Btfw_show_birdwatch_pivots_enabled%3Aon%3Btfw_show_business_verified_badge%3Aon%3Btfw_duplicate_scribes_to_settings%3Aon%3Btfw_use_profile_image_shape_enabled%3Aon%3Btfw_show_blue_verified_badge%3Aon%3Btfw_legacy_timeline_sunset%3Atrue%3Btfw_show_gov_verified_badge%3Aon%3Btfw_show_business_affiliate_badge%3Aon%3Btfw_tweet_edit_frontend%3Aon&token=4c2mmul6mnh`;
-
-          const resp = await fetch(url, {
-            headers: {
-              "User-Agent":
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
-              Accept: "application/json",
-              "Accept-Language": "en-US,en;q=0.5",
-              "Accept-Encoding": "gzip, deflate, br",
-              Connection: "keep-alive",
-              "Upgrade-Insecure-Requests": "1",
-              "Cache-Control": "max-age=0",
-              TE: "Trailers",
-            },
-          });
-
-          const data = (await resp.json()) as Tweet;
-          return data;
-        }
-      );
-
-      let tweetContent: {
-        text: string;
-        metadata: {
-          media?: string[] | undefined;
-          links?: string[] | undefined;
-        };
-        raw: string;
-      };
-      const unrolledTweetContent = {
-        value: [rawBaseTweetContent],
-      };
-      if (true) {
-        console.error("Can't get thread, reverting back to single tweet");
-        tweetContent = {
-          text: rawBaseTweetContent.text,
-          metadata: {
-            media: [
-              ...(rawBaseTweetContent.photos?.map((url) => url.expandedUrl) ??
-                []),
-              ...(rawBaseTweetContent.video?.variants[0].src ?? []),
-            ],
-          },
-          raw: `<raw>${JSON.stringify(rawBaseTweetContent)}</raw>`,
-        };
-      } else {
-        tweetContent = {
-          text: unrolledTweetContent.value
-            .map((tweet) => tweet.text)
-            .join("\n"),
-          metadata: {
-            media: unrolledTweetContent.value.flatMap((tweet) => [
-              ...tweet.videos,
-              ...tweet.images,
-            ]),
-            links: unrolledTweetContent.value.flatMap((tweet) => tweet.links),
-          },
-          raw: `<raw>${JSON.stringify(rawBaseTweetContent)}</raw>`,
-        };
-      }
-
-      // make it the same type as the page content
-      const pageContentType: Awaited<ReturnType<typeof extractPageContent>> & {
-        type: string;
-      } = {
-        contentToVectorize:
-          tweetContent.text +
-          "\n\nMetadata for this tweet:\n" +
-          JSON.stringify(tweetContent.metadata) +
-          "\n\nRaw tweet data:\n" +
-          tweetContent.raw,
-        contentToSave: tweetContent.raw,
-        title: "",
-        description: JSON.stringify(tweetContent.metadata),
-        image: "",
-        favicon: "",
-        type: "tweet",
-      };
-      return pageContentType;
-    case "note":
-      const noteContent = {
-        contentToVectorize: params.content,
-        // TODO: different when using platejs
-        contentToSave: params.content,
-        // title is the first 30 characters of the first line
-        title: params.content.split("\n")[0].slice(0, 30),
-        type: "note",
-      };
-      return noteContent;
-    case "document":
-      const documentContent = await step.do(
-        "extract document content",
-        async () => await extractDocumentContent(params.content)
-      );
-      return {
-        contentToVectorize: documentContent.content,
-        contentToSave: documentContent.content,
-        type: "document",
-      };
-    default:
-      throw new NonRetryableError("Unknown content type");
-  }
-};
diff --git a/apps/backend/src/utils/notion.ts b/apps/backend/src/utils/notion.ts
deleted file mode 100644
index ebe559e1..00000000
--- a/apps/backend/src/utils/notion.ts
+++ /dev/null
@@ -1,239 +0,0 @@
-interface PageContent {
-  content: string;
-  url: string;
-  title: string;
-  id: string;
-  createdAt: string;
-}
-
-interface NotionBlock {
-  type: string;
-  [key: string]: any;
-}
-
-interface SearchResponse {
-  results: {
-    id: string;
-    object: string;
-    url: string;
-    created_time: string;
-    properties: {
-      title?: {
-        title: Array<{
-          plain_text: string;
-        }>;
-      };
-      Name?: {
-        title: Array<{
-          plain_text: string;
-        }>;
-      };
-    };
-  }[];
-  next_cursor: string | undefined;
-  has_more: boolean;
-}
-
-interface BlockResponse {
-  results: NotionBlock[];
-  next_cursor: string | undefined;
-  has_more: boolean;
-}
-
-export const getAllNotionPageContents = async (
-  token: string,
-  onProgress: (progress: number) => Promise<void>
-): Promise<PageContent[]> => {
-  const pages: PageContent[] = [];
-  const NOTION_API_VERSION = "2022-06-28";
-  const BASE_URL = "https://api.notion.com/v1";
-  const MAX_RETRIES = 3;
-  const BATCH_SIZE = 10; // Number of concurrent requests
-  const PAGE_SIZE = 100; // Number of pages to fetch per search request
-
-  const delay = (ms: number) =>
-    new Promise((resolve) => setTimeout(resolve, ms));
-
-  const notionFetch = async (
-    endpoint: string,
-    options: RequestInit = {},
-    retries = 0
-  ): Promise<any> => {
-    try {
-      const response = await fetch(`${BASE_URL}${endpoint}`, {
-        ...options,
-        headers: {
-          Authorization: `Bearer ${token}`,
-          "Notion-Version": NOTION_API_VERSION,
-          "Content-Type": "application/json",
-          ...((options.headers || {}) as Record<string, string>),
-        },
-      });
-
-      if (response.status === 429) {
-        // Rate limit error
-        const retryAfter = parseInt(response.headers.get("Retry-After") || "5");
-        if (retries < MAX_RETRIES) {
-          await delay(retryAfter * 1000);
-          return notionFetch(endpoint, options, retries + 1);
-        }
-      }
-
-      if (!response.ok) {
-        const errorText = await response.text();
-        throw new Error(
-          `Notion API error: ${response.statusText}\n${errorText}`
-        );
-      }
-
-      return response.json();
-    } catch (error) {
-      if (retries < MAX_RETRIES) {
-        await delay(2000 * (retries + 1)); // Exponential backoff
-        return notionFetch(endpoint, options, retries + 1);
-      }
-      throw error;
-    }
-  };
-
-  const convertBlockToMarkdown = (block: NotionBlock): string => {
-    switch (block.type) {
-      case "paragraph":
-        return (
-          block.paragraph?.rich_text
-            ?.map((text: any) => text.plain_text)
-            .join("") || ""
-        );
-      case "heading_1":
-        return `# ${block.heading_1?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      case "heading_2":
-        return `## ${block.heading_2?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      case "heading_3":
-        return `### ${block.heading_3?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      case "bulleted_list_item":
-        return `* ${block.bulleted_list_item?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      case "numbered_list_item":
-        return `1. ${block.numbered_list_item?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      case "to_do":
-        const checked = block.to_do?.checked ? "x" : " ";
-        return `- [${checked}] ${block.to_do?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      case "code":
-        return `\`\`\`${block.code?.language || ""}\n${block.code?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n\`\`\`\n`;
-      case "quote":
-        return `> ${block.quote?.rich_text
-          ?.map((text: any) => text.plain_text)
-          .join("")}\n`;
-      default:
-        return "";
-    }
-  };
-
-  const getAllBlocks = async (pageId: string): Promise<NotionBlock[]> => {
-    const blocks: NotionBlock[] = [];
-    let cursor: string | undefined = undefined;
-
-    do {
-      const endpoint = `/blocks/${pageId}/children${
-        cursor ? `?start_cursor=${cursor}` : ""
-      }`;
-      const response = (await notionFetch(endpoint)) as BlockResponse;
-      blocks.push(...response.results);
-      cursor = response.next_cursor;
-    } while (cursor);
-
-    return blocks;
-  };
-
-  try {
-    let hasMore = true;
-    let cursor: string | undefined = undefined;
-    let allPages: SearchResponse["results"] = [];
-
-    // First, collect all pages
-    while (hasMore) {
-      const searchResponse = (await notionFetch("/search", {
-        method: "POST",
-        body: JSON.stringify({
-          filter: {
-            value: "page",
-            property: "object",
-          },
-          sort: {
-            direction: "ascending",
-            timestamp: "last_edited_time",
-          },
-          start_cursor: cursor,
-          page_size: PAGE_SIZE,
-        }),
-      })) as SearchResponse;
-
-      allPages = [...allPages, ...searchResponse.results];
-      cursor = searchResponse.next_cursor;
-      hasMore = searchResponse.has_more;
-
-      // Report progress for page collection (0-30%)
-      const progressPercent = (allPages.length / (allPages.length + searchResponse.results.length)) * 30;
-      await onProgress(progressPercent);
-    }
-
-    // Process pages in parallel batches
-    for (let i = 0; i < allPages.length; i += BATCH_SIZE) {
-      const batch = allPages.slice(i, i + BATCH_SIZE);
-      const batchResults = await Promise.all(
-        batch.map(async (page) => {
-          try {
-            const blocks = await getAllBlocks(page.id);
-            const pageContent = {
-              content: blocks.map(convertBlockToMarkdown).join("\n"),
-              url: page.url || `https://notion.so/${page.id.replace(/-/g, "")}`,
-              title:
-                page.properties?.Name?.title?.[0]?.plain_text ||
-                page.properties?.title?.title?.[0]?.plain_text ||
-                "Untitled",
-              id: page.id,
-              createdAt: page.created_time,
-            };
-            return pageContent.content.length > 10 ? pageContent : null;
-          } catch (error) {
-            console.error(`Error processing page ${page.id}:`, error);
-            return null;
-          }
-        })
-      );
-
-      pages.push(
-        ...batchResults.filter(
-          (result): result is PageContent => result !== null
-        )
-      );
-
-      // Report progress for page processing (30-100%)
-      const progressPercent = 30 + ((i + BATCH_SIZE) / allPages.length) * 70;
-      await onProgress(Math.min(progressPercent, 100));
-
-      // Add a small delay between batches to respect rate limits
-      if (i + BATCH_SIZE < allPages.length) {
-        await delay(1000);
-      }
-    }
-
-    return pages.filter((page) => page.content.length > 10);
-  } catch (error) {
-    console.error("Error fetching Notion pages:", error);
-    throw error;
-  }
-};
diff --git a/apps/backend/src/utils/tweetsToThreads.ts b/apps/backend/src/utils/tweetsToThreads.ts
deleted file mode 100644
index 85f69b87..00000000
--- a/apps/backend/src/utils/tweetsToThreads.ts
+++ /dev/null
@@ -1,108 +0,0 @@
-import * as cheerio from "cheerio";
-import { BaseError } from "../errors/baseError";
-import { Ok, Result } from "../errors/results";
-
-interface Tweet {
-  id: string;
-  text: string;
-  links: Array<string>;
-  images: Array<string>;
-  videos: Array<string>;
-}
-
-class ProcessTweetsError extends BaseError {
-  constructor(message?: string, source?: string) {
-    super("[Thread Proceessing Error]", message, source);
-  }
-}
-
-type TweetProcessResult = Array<Tweet>;
-
-// there won't be a need for url caching right?
-export async function unrollTweets(
-  url: string
-): Promise<Result<TweetProcessResult, ProcessTweetsError>> {
-  const tweetId = url.split("/").pop();
-  const response = await fetch(`https://unrollnow.com/status/${tweetId}`, {
-    headers: {
-      "User-Agent":
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
-      "Cache-Control": "max-age=3600",
-    },
-  });
-
-  if (!response.ok) {
-    const error = await response.text();
-    console.error(error);
-    throw new Error(`HTTP error! status: ${response.status} - ${error}`);
-  }
-
-  const html = await response.text();
-  const $ = cheerio.load(html);
-  const tweets: Array<Tweet> = [];
-
-  const urlRegex = /(https?:\/\/\S+)/g;
-  const paragraphs = $(".mainarticle p").toArray();
-
-  const processedTweets = await Promise.all(
-    paragraphs.map(async (element, i) => {
-      const $tweet = $(element);
-      let tweetText = $tweet.text().trim();
-      if (tweetText.length < 1) {
-        return null;
-      }
-
-      if (i === paragraphs.length - 1 && tweetText.toLowerCase() === "yes") {
-        return null;
-      }
-
-      const shortUrls = tweetText.match(urlRegex) || [];
-      console.log("SHORT_URLS_LEN", shortUrls.length);
-      console.log("SHORT_URLS", shortUrls);
-
-      const expandedUrls = await Promise.all(shortUrls.map(expandShortUrl));
-
-      tweetText = tweetText.replace(urlRegex, "").trim().replace(/\s+/g, " ");
-
-      const images = $tweet
-        .nextUntil("p")
-        .find("img.tweetimg")
-        .map((i, img) => $(img).attr("src"))
-        .get();
-
-      const videos = $tweet
-        .nextUntil("p")
-        .find("video > source")
-        .map((i, vid) => $(vid).attr("src"))
-        .get();
-
-      return {
-        id: `${tweetId}_${i}`,
-        text: tweetText,
-        links: expandedUrls,
-        images: images,
-        videos: videos,
-      };
-    })
-  );
-
-  tweets.push(
-    ...processedTweets.filter((tweet): tweet is Tweet => tweet !== null)
-  );
-
-  return Ok(tweets);
-}
-
-async function expandShortUrl(shortUrl: string): Promise<string> {
-  try {
-    const response = await fetch(shortUrl, {
-      method: "HEAD",
-      redirect: "follow",
-    });
-    const expandedUrl = response.url;
-    return expandedUrl;
-  } catch (error) {
-    console.error(`Failed to expand URL: ${shortUrl}`, error);
-    return shortUrl;
-  }
-}
diff --git a/apps/backend/src/utils/typeDecider.ts b/apps/backend/src/utils/typeDecider.ts
deleted file mode 100644
index 642b178e..00000000
--- a/apps/backend/src/utils/typeDecider.ts
+++ /dev/null
@@ -1,41 +0,0 @@
-import { Result, Ok, Err } from "../errors/results";
-import { BaseError } from "../errors/baseError";
-
-export type contentType = "page" | "tweet" | "note" | "document" | "notion";
-
-class GetTypeError extends BaseError {
-  constructor(message?: string, source?: string) {
-    super("[Decide Type Error]", message, source);
-  }
-}
-export const typeDecider = (
-  content: string
-): Result<contentType, GetTypeError> => {
-  try {
-    // if the content is a URL, then it's a page. if its a URL with https://x.com/user/status/123, then it's a tweet.
-    // if it ends with .pdf etc then it's a document. else, it's a note.
-    // do strict checking with regex
-    if (
-      content.match(/https?:\/\/(x\.com|twitter\.com)\/[\w]+\/[\w]+\/[\d]+/)
-    ) {
-      return Ok("tweet");
-    } else if (content.match(/\.(pdf|doc|docx|txt|rtf|odt|md)/i)) {
-      return Ok("document"); 
-    } else if (
-      content.match(/https?:\/\/(www\.)?notion\.so\/.*/)
-    ) {
-      return Ok("notion");
-    } else if (
-      content.match(
-        /^(https?:\/\/)?(www\.)?[a-z0-9]+([-.]{1}[a-z0-9]+)*\.[a-z]{2,5}(\/.*)?$/i
-      )
-    ) {
-      return Ok("page");
-    } else {
-      return Ok("note");
-    }
-  } catch (e) {
-    console.error("[Decide Type Error]", e);
-    return Err(new GetTypeError((e as Error).message, "typeDecider"));
-  }
-};
author	Mahesh Sanikommmu <[email protected]>	2025-08-16 18:50:10 -0700
committer	Mahesh Sanikommmu <[email protected]>	2025-08-16 18:50:10 -0700
commit	39003aff23d64ff1d96074d71521f6023c9bec01 (patch)
tree	3f870c04b3dce315bba1b21aa2da158494e71774 /apps/backend/src/utils
parent	Merge pull request #355 from supermemoryai/archive (diff)
download	supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.tar.xz supermemory-39003aff23d64ff1d96074d71521f6023c9bec01.zip