diff options
| -rw-r--r-- | apps/web/lib/sanitize.test.ts | 46 | ||||
| -rw-r--r-- | apps/web/lib/sanitize.ts | 15 | ||||
| -rw-r--r-- | services/worker/internal/parser/parser.go | 33 |
3 files changed, 92 insertions, 2 deletions
diff --git a/apps/web/lib/sanitize.test.ts b/apps/web/lib/sanitize.test.ts index 266b8af..f47db80 100644 --- a/apps/web/lib/sanitize.test.ts +++ b/apps/web/lib/sanitize.test.ts @@ -44,4 +44,50 @@ describe("sanitizeEntryContent", () => { const input = "<pre><code>const x = 1</code></pre>" expect(sanitizeEntryContent(input)).toBe(input) }) + + it("strips 1x1 tracking pixel images", () => { + const input = '<p>content</p><img src="https://tracker.example.com/pixel.gif" width="1" height="1">' + expect(sanitizeEntryContent(input)).toBe("<p>content</p>") + }) + + it("strips images with zero dimensions", () => { + const input = '<img src="https://tracker.example.com/pixel.gif" width="0" height="0">' + expect(sanitizeEntryContent(input)).toBe("") + }) + + it("strips images where only width is a tracking dimension", () => { + const input = '<img src="https://tracker.example.com/pixel.gif" width="1">' + expect(sanitizeEntryContent(input)).toBe("") + }) + + it("strips images where only height is a tracking dimension", () => { + const input = '<img src="https://tracker.example.com/pixel.gif" height="2">' + expect(sanitizeEntryContent(input)).toBe("") + }) + + it("preserves normal-sized images", () => { + const input = '<img src="https://example.com/photo.jpg" width="800" height="600">' + const result = sanitizeEntryContent(input) + expect(result).toContain("src=") + expect(result).toContain("width=") + expect(result).toContain("height=") + }) + + it("preserves images without dimension attributes", () => { + const input = '<img src="https://example.com/photo.jpg" alt="a photo">' + const result = sanitizeEntryContent(input) + expect(result).toContain("src=") + expect(result).toContain("alt=") + }) + + it("strips 3x3 tracking pixel at threshold boundary", () => { + const input = '<img src="https://tracker.example.com/pixel.gif" width="3" height="3">' + expect(sanitizeEntryContent(input)).toBe("") + }) + + it("preserves images just above tracking pixel threshold", () => { + const input = '<img src="https://example.com/icon.png" width="4" height="4">' + const result = sanitizeEntryContent(input) + expect(result).toContain("src=") + }) }) diff --git a/apps/web/lib/sanitize.ts b/apps/web/lib/sanitize.ts index b63cee1..3a85016 100644 --- a/apps/web/lib/sanitize.ts +++ b/apps/web/lib/sanitize.ts @@ -1,5 +1,19 @@ import sanitizeHtml from "sanitize-html" +const TRACKING_PIXEL_DIMENSION_THRESHOLD = 3 + +function isTrackingPixel(tagName: string, attributes: Record<string, string>): boolean { + if (tagName !== "img") return false + + const width = parseInt(attributes.width, 10) + const height = parseInt(attributes.height, 10) + + if (!isNaN(width) && width <= TRACKING_PIXEL_DIMENSION_THRESHOLD) return true + if (!isNaN(height) && height <= TRACKING_PIXEL_DIMENSION_THRESHOLD) return true + + return false +} + const SANITIZE_OPTIONS: sanitizeHtml.IOptions = { allowedTags: [ "h1", @@ -36,6 +50,7 @@ const SANITIZE_OPTIONS: sanitizeHtml.IOptions = { img: ["src", "alt", "title", "width", "height"], }, allowedSchemes: ["http", "https"], + exclusiveFilter: (frame) => isTrackingPixel(frame.tag, frame.attribs), } export function sanitizeEntryContent(htmlContent: string): string { diff --git a/services/worker/internal/parser/parser.go b/services/worker/internal/parser/parser.go index 203a943..b5307e8 100644 --- a/services/worker/internal/parser/parser.go +++ b/services/worker/internal/parser/parser.go @@ -174,13 +174,42 @@ func resolveGloballyUniqueIdentifier(feedItem *gofeed.Item) string { return fmt.Sprintf("sha256:%x", hashBytes) } +func stripTrackingParameters(rawURL string) string { + trimmedURL := strings.TrimSpace(rawURL) + + if !strings.HasPrefix(trimmedURL, "http://") && !strings.HasPrefix(trimmedURL, "https://") { + return trimmedURL + } + + parsedURL, parseError := url.Parse(trimmedURL) + + if parseError != nil { + return trimmedURL + } + + queryParameters := parsedURL.Query() + filteredParameters := url.Values{} + + for parameterName, parameterValues := range queryParameters { + loweredName := strings.ToLower(parameterName) + + if !trackingQueryParameters[loweredName] { + filteredParameters[parameterName] = parameterValues + } + } + + parsedURL.RawQuery = canonicalizeQueryString(filteredParameters) + + return parsedURL.String() +} + func resolveEntryURL(feedItem *gofeed.Item) string { if feedItem.Link != "" { - return feedItem.Link + return stripTrackingParameters(feedItem.Link) } if feedItem.GUID != "" && strings.HasPrefix(feedItem.GUID, "http") { - return feedItem.GUID + return stripTrackingParameters(feedItem.GUID) } return "" |