summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--apps/web/lib/sanitize.test.ts46
-rw-r--r--apps/web/lib/sanitize.ts15
-rw-r--r--services/worker/internal/parser/parser.go33
3 files changed, 92 insertions, 2 deletions
diff --git a/apps/web/lib/sanitize.test.ts b/apps/web/lib/sanitize.test.ts
index 266b8af..f47db80 100644
--- a/apps/web/lib/sanitize.test.ts
+++ b/apps/web/lib/sanitize.test.ts
@@ -44,4 +44,50 @@ describe("sanitizeEntryContent", () => {
const input = "<pre><code>const x = 1</code></pre>"
expect(sanitizeEntryContent(input)).toBe(input)
})
+
+ it("strips 1x1 tracking pixel images", () => {
+ const input = '<p>content</p><img src="https://tracker.example.com/pixel.gif" width="1" height="1">'
+ expect(sanitizeEntryContent(input)).toBe("<p>content</p>")
+ })
+
+ it("strips images with zero dimensions", () => {
+ const input = '<img src="https://tracker.example.com/pixel.gif" width="0" height="0">'
+ expect(sanitizeEntryContent(input)).toBe("")
+ })
+
+ it("strips images where only width is a tracking dimension", () => {
+ const input = '<img src="https://tracker.example.com/pixel.gif" width="1">'
+ expect(sanitizeEntryContent(input)).toBe("")
+ })
+
+ it("strips images where only height is a tracking dimension", () => {
+ const input = '<img src="https://tracker.example.com/pixel.gif" height="2">'
+ expect(sanitizeEntryContent(input)).toBe("")
+ })
+
+ it("preserves normal-sized images", () => {
+ const input = '<img src="https://example.com/photo.jpg" width="800" height="600">'
+ const result = sanitizeEntryContent(input)
+ expect(result).toContain("src=")
+ expect(result).toContain("width=")
+ expect(result).toContain("height=")
+ })
+
+ it("preserves images without dimension attributes", () => {
+ const input = '<img src="https://example.com/photo.jpg" alt="a photo">'
+ const result = sanitizeEntryContent(input)
+ expect(result).toContain("src=")
+ expect(result).toContain("alt=")
+ })
+
+ it("strips 3x3 tracking pixel at threshold boundary", () => {
+ const input = '<img src="https://tracker.example.com/pixel.gif" width="3" height="3">'
+ expect(sanitizeEntryContent(input)).toBe("")
+ })
+
+ it("preserves images just above tracking pixel threshold", () => {
+ const input = '<img src="https://example.com/icon.png" width="4" height="4">'
+ const result = sanitizeEntryContent(input)
+ expect(result).toContain("src=")
+ })
})
diff --git a/apps/web/lib/sanitize.ts b/apps/web/lib/sanitize.ts
index b63cee1..3a85016 100644
--- a/apps/web/lib/sanitize.ts
+++ b/apps/web/lib/sanitize.ts
@@ -1,5 +1,19 @@
import sanitizeHtml from "sanitize-html"
+const TRACKING_PIXEL_DIMENSION_THRESHOLD = 3
+
+function isTrackingPixel(tagName: string, attributes: Record<string, string>): boolean {
+ if (tagName !== "img") return false
+
+ const width = parseInt(attributes.width, 10)
+ const height = parseInt(attributes.height, 10)
+
+ if (!isNaN(width) && width <= TRACKING_PIXEL_DIMENSION_THRESHOLD) return true
+ if (!isNaN(height) && height <= TRACKING_PIXEL_DIMENSION_THRESHOLD) return true
+
+ return false
+}
+
const SANITIZE_OPTIONS: sanitizeHtml.IOptions = {
allowedTags: [
"h1",
@@ -36,6 +50,7 @@ const SANITIZE_OPTIONS: sanitizeHtml.IOptions = {
img: ["src", "alt", "title", "width", "height"],
},
allowedSchemes: ["http", "https"],
+ exclusiveFilter: (frame) => isTrackingPixel(frame.tag, frame.attribs),
}
export function sanitizeEntryContent(htmlContent: string): string {
diff --git a/services/worker/internal/parser/parser.go b/services/worker/internal/parser/parser.go
index 203a943..b5307e8 100644
--- a/services/worker/internal/parser/parser.go
+++ b/services/worker/internal/parser/parser.go
@@ -174,13 +174,42 @@ func resolveGloballyUniqueIdentifier(feedItem *gofeed.Item) string {
return fmt.Sprintf("sha256:%x", hashBytes)
}
+func stripTrackingParameters(rawURL string) string {
+ trimmedURL := strings.TrimSpace(rawURL)
+
+ if !strings.HasPrefix(trimmedURL, "http://") && !strings.HasPrefix(trimmedURL, "https://") {
+ return trimmedURL
+ }
+
+ parsedURL, parseError := url.Parse(trimmedURL)
+
+ if parseError != nil {
+ return trimmedURL
+ }
+
+ queryParameters := parsedURL.Query()
+ filteredParameters := url.Values{}
+
+ for parameterName, parameterValues := range queryParameters {
+ loweredName := strings.ToLower(parameterName)
+
+ if !trackingQueryParameters[loweredName] {
+ filteredParameters[parameterName] = parameterValues
+ }
+ }
+
+ parsedURL.RawQuery = canonicalizeQueryString(filteredParameters)
+
+ return parsedURL.String()
+}
+
func resolveEntryURL(feedItem *gofeed.Item) string {
if feedItem.Link != "" {
- return feedItem.Link
+ return stripTrackingParameters(feedItem.Link)
}
if feedItem.GUID != "" && strings.HasPrefix(feedItem.GUID, "http") {
- return feedItem.GUID
+ return stripTrackingParameters(feedItem.GUID)
}
return ""