interface OGResponse { title: string description: string image?: string } function isValidUrl(urlString: string): boolean { try { const url = new URL(urlString) return url.protocol === "http:" || url.protocol === "https:" } catch { return false } } function isPrivateHost(hostname: string): boolean { const lowerHost = hostname.toLowerCase() if ( lowerHost === "localhost" || lowerHost === "127.0.0.1" || lowerHost === "::1" || lowerHost.startsWith("127.") || lowerHost.startsWith("0.0.0.0") ) { return true } const privateIpPatterns = [ /^10\./, /^172\.(1[6-9]|2[0-9]|3[01])\./, /^192\.168\./, ] return privateIpPatterns.some((pattern) => pattern.test(hostname)) } // File extensions that are not HTML and can't be scraped for OG data const NON_HTML_EXTENSIONS = [ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".zip", ".rar", ".7z", ".tar", ".gz", ".mp3", ".mp4", ".avi", ".mov", ".wmv", ".flv", ".webm", ".wav", ".ogg", ".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg", ".ico", ".bmp", ".tiff", ".exe", ".dmg", ".iso", ".bin", ] function isNonHtmlUrl(url: string): boolean { try { const urlObj = new URL(url) const pathname = urlObj.pathname.toLowerCase() return NON_HTML_EXTENSIONS.some((ext) => pathname.endsWith(ext)) } catch { return false } } function extractImageUrl(image: unknown): string | undefined { if (!image) return undefined if (typeof image === "string") { return image } if (Array.isArray(image) && image.length > 0) { const first = image[0] if (first && typeof first === "object" && "url" in first) { return String(first.url) } } return "" } function extractMetaTag(html: string, patterns: RegExp[]): string { for (const pattern of patterns) { const match = html.match(pattern) if (match?.[1]) { return match[1] .replace(/&/g, "&") .replace(/</g, "<") .replace(/>/g, ">") .replace(/"/g, '"') .replace(/'/g, "'") .trim() } } return "" } function resolveImageUrl( imageUrl: string | undefined, baseUrl: string, ): string | undefined { if (!imageUrl) return undefined try { const url = new URL(imageUrl) return url.href } catch { try { const base = new URL(baseUrl) return new URL(imageUrl, base.href).href } catch { return undefined } } } export async function GET(request: Request) { try { const { searchParams } = new URL(request.url) const url = searchParams.get("url") if (!url || !url.trim()) { return Response.json( { error: "Missing or invalid url parameter" }, { status: 400 }, ) } const trimmedUrl = url.trim() if (!isValidUrl(trimmedUrl)) { return Response.json( { error: "Invalid URL. Must be http:// or https://" }, { status: 400 }, ) } const urlObj = new URL(trimmedUrl) if (isPrivateHost(urlObj.hostname)) { return Response.json( { error: "Private/localhost URLs are not allowed" }, { status: 400 }, ) } // Skip OG scraping for non-HTML files (PDFs, images, etc.) if (isNonHtmlUrl(trimmedUrl)) { return Response.json( { title: "", description: "" }, { headers: { "Cache-Control": "public, s-maxage=3600, stale-while-revalidate=86400", }, }, ) } const controller = new AbortController() const timeoutId = setTimeout(() => controller.abort(), 8000) const response = await fetch(trimmedUrl, { signal: controller.signal, headers: { "User-Agent": "Mozilla/5.0 (compatible; SuperMemory/1.0; +https://supermemory.ai)", }, }) clearTimeout(timeoutId) if (!response.ok) { return Response.json( { error: "Failed to fetch URL" }, { status: response.status }, ) } const html = await response.text() const titlePatterns = [ /([^<]+)<\/title>/i, ] const descriptionPatterns = [ /