interface OGResponse {
title: string
description: string
image?: string
}
function isValidUrl(urlString: string): boolean {
try {
const url = new URL(urlString)
return url.protocol === "http:" || url.protocol === "https:"
} catch {
return false
}
}
function isPrivateHost(hostname: string): boolean {
const lowerHost = hostname.toLowerCase()
if (
lowerHost === "localhost" ||
lowerHost === "127.0.0.1" ||
lowerHost === "::1" ||
lowerHost.startsWith("127.") ||
lowerHost.startsWith("0.0.0.0")
) {
return true
}
const privateIpPatterns = [
/^10\./,
/^172\.(1[6-9]|2[0-9]|3[01])\./,
/^192\.168\./,
]
return privateIpPatterns.some((pattern) => pattern.test(hostname))
}
// File extensions that are not HTML and can't be scraped for OG data
const NON_HTML_EXTENSIONS = [
".pdf",
".doc",
".docx",
".xls",
".xlsx",
".ppt",
".pptx",
".zip",
".rar",
".7z",
".tar",
".gz",
".mp3",
".mp4",
".avi",
".mov",
".wmv",
".flv",
".webm",
".wav",
".ogg",
".jpg",
".jpeg",
".png",
".gif",
".webp",
".svg",
".ico",
".bmp",
".tiff",
".exe",
".dmg",
".iso",
".bin",
]
function isNonHtmlUrl(url: string): boolean {
try {
const urlObj = new URL(url)
const pathname = urlObj.pathname.toLowerCase()
return NON_HTML_EXTENSIONS.some((ext) => pathname.endsWith(ext))
} catch {
return false
}
}
function extractImageUrl(image: unknown): string | undefined {
if (!image) return undefined
if (typeof image === "string") {
return image
}
if (Array.isArray(image) && image.length > 0) {
const first = image[0]
if (first && typeof first === "object" && "url" in first) {
return String(first.url)
}
}
return ""
}
function extractMetaTag(html: string, patterns: RegExp[]): string {
for (const pattern of patterns) {
const match = html.match(pattern)
if (match?.[1]) {
return match[1]
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/"/g, '"')
.replace(/'/g, "'")
.trim()
}
}
return ""
}
function resolveImageUrl(
imageUrl: string | undefined,
baseUrl: string,
): string | undefined {
if (!imageUrl) return undefined
try {
const url = new URL(imageUrl)
return url.href
} catch {
try {
const base = new URL(baseUrl)
return new URL(imageUrl, base.href).href
} catch {
return undefined
}
}
}
export async function GET(request: Request) {
try {
const { searchParams } = new URL(request.url)
const url = searchParams.get("url")
if (!url || !url.trim()) {
return Response.json(
{ error: "Missing or invalid url parameter" },
{ status: 400 },
)
}
const trimmedUrl = url.trim()
if (!isValidUrl(trimmedUrl)) {
return Response.json(
{ error: "Invalid URL. Must be http:// or https://" },
{ status: 400 },
)
}
const urlObj = new URL(trimmedUrl)
if (isPrivateHost(urlObj.hostname)) {
return Response.json(
{ error: "Private/localhost URLs are not allowed" },
{ status: 400 },
)
}
// Skip OG scraping for non-HTML files (PDFs, images, etc.)
if (isNonHtmlUrl(trimmedUrl)) {
return Response.json(
{ title: "", description: "" },
{
headers: {
"Cache-Control":
"public, s-maxage=3600, stale-while-revalidate=86400",
},
},
)
}
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), 8000)
const response = await fetch(trimmedUrl, {
signal: controller.signal,
headers: {
"User-Agent":
"Mozilla/5.0 (compatible; SuperMemory/1.0; +https://supermemory.ai)",
},
})
clearTimeout(timeoutId)
if (!response.ok) {
return Response.json(
{ error: "Failed to fetch URL" },
{ status: response.status },
)
}
const html = await response.text()
const titlePatterns = [
/([^<]+)<\/title>/i,
]
const descriptionPatterns = [
/