aboutsummaryrefslogtreecommitdiff
path: root/apps/web/app/api
diff options
context:
space:
mode:
authorDhravya Shah <[email protected]>2026-01-23 17:42:47 -0700
committerDhravya Shah <[email protected]>2026-01-23 17:42:47 -0700
commit4ca0f593a5d89695e101569f09debda5617c0ec6 (patch)
tree60517a8e898965cf8120cc01c56f69baaff0d06e /apps/web/app/api
parentextract metadata ourselves (diff)
parentfix: cf build (#700) (diff)
downloadsupermemory-4ca0f593a5d89695e101569f09debda5617c0ec6.tar.xz
supermemory-4ca0f593a5d89695e101569f09debda5617c0ec6.zip
fix: merge conflicts
Diffstat (limited to 'apps/web/app/api')
-rw-r--r--apps/web/app/api/og/route.ts77
1 files changed, 77 insertions, 0 deletions
diff --git a/apps/web/app/api/og/route.ts b/apps/web/app/api/og/route.ts
index 97f024a5..4c61ebe5 100644
--- a/apps/web/app/api/og/route.ts
+++ b/apps/web/app/api/og/route.ts
@@ -37,6 +37,70 @@ function isPrivateHost(hostname: string): boolean {
return privateIpPatterns.some((pattern) => pattern.test(hostname))
}
+// File extensions that are not HTML and can't be scraped for OG data
+const NON_HTML_EXTENSIONS = [
+ ".pdf",
+ ".doc",
+ ".docx",
+ ".xls",
+ ".xlsx",
+ ".ppt",
+ ".pptx",
+ ".zip",
+ ".rar",
+ ".7z",
+ ".tar",
+ ".gz",
+ ".mp3",
+ ".mp4",
+ ".avi",
+ ".mov",
+ ".wmv",
+ ".flv",
+ ".webm",
+ ".wav",
+ ".ogg",
+ ".jpg",
+ ".jpeg",
+ ".png",
+ ".gif",
+ ".webp",
+ ".svg",
+ ".ico",
+ ".bmp",
+ ".tiff",
+ ".exe",
+ ".dmg",
+ ".iso",
+ ".bin",
+]
+
+function isNonHtmlUrl(url: string): boolean {
+ try {
+ const urlObj = new URL(url)
+ const pathname = urlObj.pathname.toLowerCase()
+ return NON_HTML_EXTENSIONS.some((ext) => pathname.endsWith(ext))
+ } catch {
+ return false
+ }
+}
+
+function extractImageUrl(image: unknown): string | undefined {
+ if (!image) return undefined
+
+ if (typeof image === "string") {
+ return image
+ }
+
+ if (Array.isArray(image) && image.length > 0) {
+ const first = image[0]
+ if (first && typeof first === "object" && "url" in first) {
+ return String(first.url)
+ }
+ }
+ return ""
+}
+
function extractMetaTag(html: string, patterns: RegExp[]): string {
for (const pattern of patterns) {
const match = html.match(pattern)
@@ -101,6 +165,19 @@ export async function GET(request: Request) {
)
}
+ // Skip OG scraping for non-HTML files (PDFs, images, etc.)
+ if (isNonHtmlUrl(trimmedUrl)) {
+ return Response.json(
+ { title: "", description: "" },
+ {
+ headers: {
+ "Cache-Control":
+ "public, s-maxage=3600, stale-while-revalidate=86400",
+ },
+ },
+ )
+ }
+
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), 8000)