From 8d8d6d91aa8cdec49e7e6ef87cbc5622d0bc16ed Mon Sep 17 00:00:00 2001 From: MaheshtheDev <38828053+MaheshtheDev@users.noreply.github.com> Date: Mon, 3 Nov 2025 00:41:58 +0000 Subject: feat(browser-extension): webpages capture with markdown conversion (#548) Improved browser extension memory saving with better content handling and added markdown conversion. ### What changed? - Enhanced memory content handling in the background script to prioritize different content types (explicit content, highlighted text, markdown, HTML, or URL) - Added HTML to markdown conversion using TurndownService when saving entire pages - Improved HTML handling by removing script tags before processing - Updated the web app to display the saved URL from metadata when available - Added turndown library and its type definitions as dependencies --- apps/browser-extension/entrypoints/background.ts | 32 +++++++++-- .../entrypoints/content/shared.ts | 64 +++++++++++++++++++--- 2 files changed, 85 insertions(+), 11 deletions(-) (limited to 'apps/browser-extension/entrypoints') diff --git a/apps/browser-extension/entrypoints/background.ts b/apps/browser-extension/entrypoints/background.ts index 7461af37..131207c2 100644 --- a/apps/browser-extension/entrypoints/background.ts +++ b/apps/browser-extension/entrypoints/background.ts @@ -117,12 +117,36 @@ export default defineBackground(() => { console.warn("Failed to get default project, using fallback:", error) } + let content: string + if (data.content) { + content = data.content + } else if (data.highlightedText) { + content = `${data.highlightedText}\n\n${data?.url || ""}` + } else if (data.markdown) { + content = `${data.markdown}\n\n${data?.url || ""}` + } else if (data.html) { + content = `${data.html}\n\n${data?.url || ""}` + } else { + content = data?.url || "" + } + + const metadata: MemoryPayload["metadata"] = { + sm_source: "consumer", + website_url: data.url, + } + + if (data.ogImage) { + metadata.website_og_image = data.ogImage + } + + if (data.title) { + metadata.website_title = data.title + } + const payload: MemoryPayload = { containerTags: [containerTag], - content: - data.content || - `${data.highlightedText}\n\n${data.html}\n\n${data?.url}`, - metadata: { sm_source: "consumer" }, + content, + metadata, } const responseData = await saveMemory(payload) diff --git a/apps/browser-extension/entrypoints/content/shared.ts b/apps/browser-extension/entrypoints/content/shared.ts index d8b665c5..8c3688e0 100644 --- a/apps/browser-extension/entrypoints/content/shared.ts +++ b/apps/browser-extension/entrypoints/content/shared.ts @@ -1,5 +1,6 @@ import { MESSAGE_TYPES, STORAGE_KEYS } from "../../utils/constants" import { DOMUtils } from "../../utils/ui-components" +import { default as TurndownService } from "turndown" export async function saveMemory() { try { @@ -7,15 +8,64 @@ export async function saveMemory() { const highlightedText = window.getSelection()?.toString() || "" const url = window.location.href - const html = document.documentElement.outerHTML + + const ogImage = + document + .querySelector('meta[property="og:image"]') + ?.getAttribute("content") || + document + .querySelector('meta[name="og:image"]') + ?.getAttribute("content") || + undefined + + const title = + document + .querySelector('meta[property="og:title"]') + ?.getAttribute("content") || + document + .querySelector('meta[name="og:title"]') + ?.getAttribute("content") || + document.title || + undefined + + const data: { + html?: string + markdown?: string + highlightedText?: string + url: string + ogImage?: string + title?: string + } = { + url, + } + + if (ogImage) { + data.ogImage = ogImage + } + + if (title) { + data.title = title + } + + if (highlightedText) { + data.highlightedText = highlightedText + } else { + const bodyClone = document.body.cloneNode(true) as HTMLElement + const scripts = bodyClone.querySelectorAll("script") + for (const script of scripts) { + script.remove() + } + const html = bodyClone.innerHTML + + // Convert HTML to markdown + const turndownService = new TurndownService() + const markdown = turndownService.turndown(html) + data.markdown = markdown + } const response = await browser.runtime.sendMessage({ action: MESSAGE_TYPES.SAVE_MEMORY, - data: { - html, - highlightedText, - url, - }, + data, actionSource: "context_menu", }) @@ -74,4 +124,4 @@ export function setupStorageListener() { ) } }) -} \ No newline at end of file +} -- cgit v1.2.3