From 35004c474ad021f4772bd4ba4da41a4d5d9a9b2e Mon Sep 17 00:00:00 2001 From: Dhravya Shah Date: Fri, 23 Jan 2026 17:39:23 -0700 Subject: extract metadata ourselves --- apps/web/app/api/og/route.ts | 107 ++++++++++++--------- apps/web/components/new/chat/index.tsx | 4 +- .../components/new/document-cards/file-preview.tsx | 6 +- .../new/document-cards/google-docs-preview.tsx | 6 +- apps/web/components/new/document-icon.tsx | 8 +- .../new/document-modal/content/google-doc.tsx | 5 +- .../new/document-modal/content/index.tsx | 9 +- .../new/document-modal/graph-list-memories.tsx | 4 +- apps/web/lib/analytics.ts | 5 +- apps/web/package.json | 1 - 10 files changed, 82 insertions(+), 73 deletions(-) diff --git a/apps/web/app/api/og/route.ts b/apps/web/app/api/og/route.ts index 5ca6e44c..97f024a5 100644 --- a/apps/web/app/api/og/route.ts +++ b/apps/web/app/api/og/route.ts @@ -1,6 +1,4 @@ -import ogs from "open-graph-scraper" - -export const runtime = "nodejs" +export const runtime = "edge" interface OGResponse { title: string @@ -20,7 +18,6 @@ function isValidUrl(urlString: string): boolean { function isPrivateHost(hostname: string): boolean { const lowerHost = hostname.toLowerCase() - // Block localhost variants if ( lowerHost === "localhost" || lowerHost === "127.0.0.1" || @@ -31,7 +28,6 @@ function isPrivateHost(hostname: string): boolean { return true } - // Block RFC 1918 private IP ranges const privateIpPatterns = [ /^10\./, /^172\.(1[6-9]|2[0-9]|3[01])\./, @@ -41,25 +37,20 @@ function isPrivateHost(hostname: string): boolean { return privateIpPatterns.some((pattern) => pattern.test(hostname)) } -function extractImageUrl(image: unknown): string | undefined { - if (!image) return undefined - - if (typeof image === "string") { - return image - } - - if (Array.isArray(image) && image.length > 0) { - const first = image[0] - if (first && typeof first === "object" && "url" in first) { - return String(first.url) +function extractMetaTag(html: string, patterns: RegExp[]): string { + for (const pattern of patterns) { + const match = html.match(pattern) + if (match?.[1]) { + return match[1] + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .trim() } } - - if (typeof image === "object" && image !== null && "url" in image) { - return String(image.url) - } - - return undefined + return "" } function resolveImageUrl( @@ -110,46 +101,68 @@ export async function GET(request: Request) { ) } - const { result, error } = await ogs({ - url: trimmedUrl, - timeout: 8000, - fetchOptions: { - headers: { - "User-Agent": - "Mozilla/5.0 (compatible; SuperMemory/1.0; +https://supermemory.ai)", - }, + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), 8000) + + const response = await fetch(trimmedUrl, { + signal: controller.signal, + headers: { + "User-Agent": + "Mozilla/5.0 (compatible; SuperMemory/1.0; +https://supermemory.ai)", }, }) - if (error || !result) { - console.error("OG scraping error:", error) + clearTimeout(timeoutId) + + if (!response.ok) { return Response.json( - { error: "Failed to fetch Open Graph data" }, - { status: 500 }, + { error: "Failed to fetch URL" }, + { status: response.status }, ) } - const ogTitle = result.ogTitle || result.twitterTitle || "" - const ogDescription = - result.ogDescription || result.twitterDescription || "" - - const ogImageUrl = - extractImageUrl(result.ogImage) || extractImageUrl(result.twitterImage) - - const resolvedImageUrl = resolveImageUrl(ogImageUrl, trimmedUrl) - - const response: OGResponse = { - title: ogTitle, - description: ogDescription, + const html = await response.text() + + const titlePatterns = [ + /([^<]+)<\/title>/i, + ] + + const descriptionPatterns = [ + / - Chat with Nova + + Chat with Nova + ) : ( diff --git a/apps/web/components/new/document-cards/file-preview.tsx b/apps/web/components/new/document-cards/file-preview.tsx index f30645dc..44c2476b 100644 --- a/apps/web/components/new/document-cards/file-preview.tsx +++ b/apps/web/components/new/document-cards/file-preview.tsx @@ -86,7 +86,11 @@ export function FilePreview({ document }: { document: DocumentWithMemories }) { ) : (
- +

- +

{label}

diff --git a/apps/web/components/new/document-icon.tsx b/apps/web/components/new/document-icon.tsx index a2a502e1..4861e978 100644 --- a/apps/web/components/new/document-icon.tsx +++ b/apps/web/components/new/document-icon.tsx @@ -53,13 +53,7 @@ function getFaviconUrl(url: string): string { } } -function FaviconIcon({ - url, - className, -}: { - url: string - className?: string -}) { +function FaviconIcon({ url, className }: { url: string; className?: string }) { const [hasError, setHasError] = useState(false) const faviconUrl = getFaviconUrl(url) diff --git a/apps/web/components/new/document-modal/content/google-doc.tsx b/apps/web/components/new/document-modal/content/google-doc.tsx index 562bac12..78dc4359 100644 --- a/apps/web/components/new/document-modal/content/google-doc.tsx +++ b/apps/web/components/new/document-modal/content/google-doc.tsx @@ -2,10 +2,7 @@ import { useState } from "react" import { Loader2 } from "lucide-react" -import { - extractGoogleDocId, - getGoogleEmbedUrl, -} from "@/lib/url-helpers" +import { extractGoogleDocId, getGoogleEmbedUrl } from "@/lib/url-helpers" interface GoogleDocViewerProps { url: string | null | undefined diff --git a/apps/web/components/new/document-modal/content/index.tsx b/apps/web/components/new/document-modal/content/index.tsx index c06bc550..39c5a2f0 100644 --- a/apps/web/components/new/document-modal/content/index.tsx +++ b/apps/web/components/new/document-modal/content/index.tsx @@ -56,10 +56,7 @@ function getContentType(document: DocumentWithMemories | null): ContentType { document.metadata?.mimeType?.toString().startsWith("image/") if (isImage && document.url) return "image" - if ( - document.type === "tweet" || - (document.url && isTwitterUrl(document.url)) - ) + if (document.type === "tweet" || (document.url && isTwitterUrl(document.url))) return "tweet" if (document.type === "text") return "text" if (document.type === "pdf") return "pdf" @@ -83,9 +80,7 @@ export function DocumentContent({ switch (contentType) { case "image": - return ( - - ) + return case "tweet": return ( diff --git a/apps/web/components/new/document-modal/graph-list-memories.tsx b/apps/web/components/new/document-modal/graph-list-memories.tsx index 49f918c2..0c2e418f 100644 --- a/apps/web/components/new/document-modal/graph-list-memories.tsx +++ b/apps/web/components/new/document-modal/graph-list-memories.tsx @@ -286,9 +286,7 @@ export function GraphListMemories({ type="button" className={cn( "text-xs text-[#525D6E] cursor-pointer transition-all text-left w-full", - expandedMemories.has(memory.id) - ? "" - : "line-clamp-2", + expandedMemories.has(memory.id) ? "" : "line-clamp-2", )} onClick={() => toggleMemory(memory.id)} > diff --git a/apps/web/lib/analytics.ts b/apps/web/lib/analytics.ts index 9bc3b7f5..84eda62b 100644 --- a/apps/web/lib/analytics.ts +++ b/apps/web/lib/analytics.ts @@ -1,7 +1,10 @@ import posthog from "posthog-js" // Helper function to safely capture events -const safeCapture = (eventName: string, properties?: Record) => { +const safeCapture = ( + eventName: string, + properties?: Record, +) => { if (posthog.__loaded) { posthog.capture(eventName, properties) } diff --git a/apps/web/package.json b/apps/web/package.json index 27ca7aa4..178b2e2b 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -80,7 +80,6 @@ "next": "16.0.9", "next-themes": "^0.4.6", "nuqs": "^2.5.2", - "open-graph-scraper": "^6.11.0", "pdfjs-dist": "5.4.296", "posthog-js": "^1.257.0", "random-word-slugs": "^0.1.7", -- cgit v1.2.3