diff options
| author | Dhravya Shah <[email protected]> | 2026-01-23 17:42:47 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2026-01-23 17:42:47 -0700 |
| commit | 4ca0f593a5d89695e101569f09debda5617c0ec6 (patch) | |
| tree | 60517a8e898965cf8120cc01c56f69baaff0d06e /apps/web/app/api | |
| parent | extract metadata ourselves (diff) | |
| parent | fix: cf build (#700) (diff) | |
| download | supermemory-4ca0f593a5d89695e101569f09debda5617c0ec6.tar.xz supermemory-4ca0f593a5d89695e101569f09debda5617c0ec6.zip | |
fix: merge conflicts
Diffstat (limited to 'apps/web/app/api')
| -rw-r--r-- | apps/web/app/api/og/route.ts | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/apps/web/app/api/og/route.ts b/apps/web/app/api/og/route.ts index 97f024a5..4c61ebe5 100644 --- a/apps/web/app/api/og/route.ts +++ b/apps/web/app/api/og/route.ts @@ -37,6 +37,70 @@ function isPrivateHost(hostname: string): boolean { return privateIpPatterns.some((pattern) => pattern.test(hostname)) } +// File extensions that are not HTML and can't be scraped for OG data +const NON_HTML_EXTENSIONS = [ + ".pdf", + ".doc", + ".docx", + ".xls", + ".xlsx", + ".ppt", + ".pptx", + ".zip", + ".rar", + ".7z", + ".tar", + ".gz", + ".mp3", + ".mp4", + ".avi", + ".mov", + ".wmv", + ".flv", + ".webm", + ".wav", + ".ogg", + ".jpg", + ".jpeg", + ".png", + ".gif", + ".webp", + ".svg", + ".ico", + ".bmp", + ".tiff", + ".exe", + ".dmg", + ".iso", + ".bin", +] + +function isNonHtmlUrl(url: string): boolean { + try { + const urlObj = new URL(url) + const pathname = urlObj.pathname.toLowerCase() + return NON_HTML_EXTENSIONS.some((ext) => pathname.endsWith(ext)) + } catch { + return false + } +} + +function extractImageUrl(image: unknown): string | undefined { + if (!image) return undefined + + if (typeof image === "string") { + return image + } + + if (Array.isArray(image) && image.length > 0) { + const first = image[0] + if (first && typeof first === "object" && "url" in first) { + return String(first.url) + } + } + return "" +} + function extractMetaTag(html: string, patterns: RegExp[]): string { for (const pattern of patterns) { const match = html.match(pattern) @@ -101,6 +165,19 @@ export async function GET(request: Request) { ) } + // Skip OG scraping for non-HTML files (PDFs, images, etc.) + if (isNonHtmlUrl(trimmedUrl)) { + return Response.json( + { title: "", description: "" }, + { + headers: { + "Cache-Control": + "public, s-maxage=3600, stale-while-revalidate=86400", + }, + }, + ) + } + const controller = new AbortController() const timeoutId = setTimeout(() => controller.abort(), 8000) |