diff options
| author | Dhravya Shah <[email protected]> | 2026-01-23 17:39:23 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2026-01-23 17:39:23 -0700 |
| commit | 35004c474ad021f4772bd4ba4da41a4d5d9a9b2e (patch) | |
| tree | 85e0232060bb979cb44ae022729c6198540bf3e0 /apps/web/app/api | |
| parent | chore: bump package versions (diff) | |
| download | supermemory-35004c474ad021f4772bd4ba4da41a4d5d9a9b2e.tar.xz supermemory-35004c474ad021f4772bd4ba4da41a4d5d9a9b2e.zip | |
extract metadata ourselves
Diffstat (limited to 'apps/web/app/api')
| -rw-r--r-- | apps/web/app/api/og/route.ts | 107 |
1 files changed, 60 insertions, 47 deletions
diff --git a/apps/web/app/api/og/route.ts b/apps/web/app/api/og/route.ts index 5ca6e44c..97f024a5 100644 --- a/apps/web/app/api/og/route.ts +++ b/apps/web/app/api/og/route.ts @@ -1,6 +1,4 @@ -import ogs from "open-graph-scraper" - -export const runtime = "nodejs" +export const runtime = "edge" interface OGResponse { title: string @@ -20,7 +18,6 @@ function isValidUrl(urlString: string): boolean { function isPrivateHost(hostname: string): boolean { const lowerHost = hostname.toLowerCase() - // Block localhost variants if ( lowerHost === "localhost" || lowerHost === "127.0.0.1" || @@ -31,7 +28,6 @@ function isPrivateHost(hostname: string): boolean { return true } - // Block RFC 1918 private IP ranges const privateIpPatterns = [ /^10\./, /^172\.(1[6-9]|2[0-9]|3[01])\./, @@ -41,25 +37,20 @@ function isPrivateHost(hostname: string): boolean { return privateIpPatterns.some((pattern) => pattern.test(hostname)) } -function extractImageUrl(image: unknown): string | undefined { - if (!image) return undefined - - if (typeof image === "string") { - return image - } - - if (Array.isArray(image) && image.length > 0) { - const first = image[0] - if (first && typeof first === "object" && "url" in first) { - return String(first.url) +function extractMetaTag(html: string, patterns: RegExp[]): string { + for (const pattern of patterns) { + const match = html.match(pattern) + if (match?.[1]) { + return match[1] + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .trim() } } - - if (typeof image === "object" && image !== null && "url" in image) { - return String(image.url) - } - - return undefined + return "" } function resolveImageUrl( @@ -110,46 +101,68 @@ export async function GET(request: Request) { ) } - const { result, error } = await ogs({ - url: trimmedUrl, - timeout: 8000, - fetchOptions: { - headers: { - "User-Agent": - "Mozilla/5.0 (compatible; SuperMemory/1.0; +https://supermemory.ai)", - }, + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), 8000) + + const response = await fetch(trimmedUrl, { + signal: controller.signal, + headers: { + "User-Agent": + "Mozilla/5.0 (compatible; SuperMemory/1.0; +https://supermemory.ai)", }, }) - if (error || !result) { - console.error("OG scraping error:", error) + clearTimeout(timeoutId) + + if (!response.ok) { return Response.json( - { error: "Failed to fetch Open Graph data" }, - { status: 500 }, + { error: "Failed to fetch URL" }, + { status: response.status }, ) } - const ogTitle = result.ogTitle || result.twitterTitle || "" - const ogDescription = - result.ogDescription || result.twitterDescription || "" - - const ogImageUrl = - extractImageUrl(result.ogImage) || extractImageUrl(result.twitterImage) - - const resolvedImageUrl = resolveImageUrl(ogImageUrl, trimmedUrl) - - const response: OGResponse = { - title: ogTitle, - description: ogDescription, + const html = await response.text() + + const titlePatterns = [ + /<meta\s+property=["']og:title["']\s+content=["']([^"']+)["']/i, + /<meta\s+content=["']([^"']+)["']\s+property=["']og:title["']/i, + /<meta\s+name=["']twitter:title["']\s+content=["']([^"']+)["']/i, + /<title>([^<]+)<\/title>/i, + ] + + const descriptionPatterns = [ + /<meta\s+property=["']og:description["']\s+content=["']([^"']+)["']/i, + /<meta\s+content=["']([^"']+)["']\s+property=["']og:description["']/i, + /<meta\s+name=["']twitter:description["']\s+content=["']([^"']+)["']/i, + /<meta\s+name=["']description["']\s+content=["']([^"']+)["']/i, + ] + + const imagePatterns = [ + /<meta\s+property=["']og:image["']\s+content=["']([^"']+)["']/i, + /<meta\s+content=["']([^"']+)["']\s+property=["']og:image["']/i, + /<meta\s+name=["']twitter:image["']\s+content=["']([^"']+)["']/i, + ] + + const title = extractMetaTag(html, titlePatterns) + const description = extractMetaTag(html, descriptionPatterns) + const imageUrl = extractMetaTag(html, imagePatterns) + const resolvedImageUrl = resolveImageUrl(imageUrl, trimmedUrl) + + const ogResponse: OGResponse = { + title, + description, ...(resolvedImageUrl && { image: resolvedImageUrl }), } - return Response.json(response, { + return Response.json(ogResponse, { headers: { "Cache-Control": "public, s-maxage=3600, stale-while-revalidate=86400", }, }) } catch (error) { + if (error instanceof Error && error.name === "AbortError") { + return Response.json({ error: "Request timeout" }, { status: 504 }) + } console.error("OG route error:", error) return Response.json({ error: "Internal server error" }, { status: 500 }) } |