diff options
| author | Fuwn <[email protected]> | 2026-02-12 00:49:03 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-02-12 00:49:42 -0800 |
| commit | 2911927a2d9fdd5616c2eda5643143f601068888 (patch) | |
| tree | 79726e2f2babc4a1da58c30b59d22981fbbdfe26 | |
| parent | Redump latest Supabase schema (diff) | |
| download | asa.news-2911927a2d9fdd5616c2eda5643143f601068888.tar.xz asa.news-2911927a2d9fdd5616c2eda5643143f601068888.zip | |
fix: prevent read entries from reverting to unread on re-fetch
Root cause: cleanup_stale_entries deleted read-but-unsaved entries from
active feeds, then the Go worker re-inserted them with new UUIDs,
orphaning the user_entry_states rows and making entries appear unread.
- cleanup_stale_entries: skip feeds with active subscribers and preserve
entries that have been read (not just saved)
- Go parser: normalize GUIDs by trimming whitespace and stripping
tracking query parameters from URL-based identifiers
- Go writer: preserve original published_at on upsert instead of
overwriting, preventing old entries from jumping to timeline top
- get_unread_counts: apply same time boundary as get_timeline so
ancient re-inserted entries don't inflate counts
- Realtime listener: ignore INSERT events for entries older than 48h
to suppress misleading "new entries" notifications from re-inserts
| -rw-r--r-- | apps/web/lib/hooks/use-realtime-entries.ts | 14 | ||||
| -rw-r--r-- | services/worker/internal/fetcher/errors_test.go | 2 | ||||
| -rw-r--r-- | services/worker/internal/parser/parser.go | 67 | ||||
| -rw-r--r-- | services/worker/internal/writer/writer.go | 2 | ||||
| -rw-r--r-- | supabase/schema.sql | 14 |
5 files changed, 89 insertions, 10 deletions
diff --git a/apps/web/lib/hooks/use-realtime-entries.ts b/apps/web/lib/hooks/use-realtime-entries.ts index 22551f9..63d0eed 100644 --- a/apps/web/lib/hooks/use-realtime-entries.ts +++ b/apps/web/lib/hooks/use-realtime-entries.ts @@ -9,6 +9,7 @@ import { useNotificationStore } from "@/lib/stores/notification-store" import { useUserInterfaceStore } from "@/lib/stores/user-interface-store" const DEBOUNCE_MILLISECONDS = 3000 +const STALE_ENTRY_THRESHOLD_HOURS = 48 export function useRealtimeEntries() { const queryClient = useQueryClient() @@ -66,7 +67,18 @@ export function useRealtimeEntries() { schema: "public", table: "entries", }, - () => { + (payload) => { + const publishedAt = payload.new?.published_at + if (publishedAt) { + const entryAge = + Date.now() - new Date(publishedAt).getTime() + const thresholdMilliseconds = + STALE_ENTRY_THRESHOLD_HOURS * 60 * 60 * 1000 + if (entryAge > thresholdMilliseconds) { + return + } + } + pendingCountReference.current++ if (debounceTimerReference.current) { diff --git a/services/worker/internal/fetcher/errors_test.go b/services/worker/internal/fetcher/errors_test.go index e81251b..5c425aa 100644 --- a/services/worker/internal/fetcher/errors_test.go +++ b/services/worker/internal/fetcher/errors_test.go @@ -107,7 +107,9 @@ func TestClassifyTimeoutError(test *testing.T) { type timeoutErr struct{} func (timeoutError *timeoutErr) Error() string { return "connection timed out" } + func (timeoutError *timeoutErr) Timeout() bool { return true } + func (timeoutError *timeoutErr) Temporary() bool { return true } func TestClassifyNetworkOpError(test *testing.T) { diff --git a/services/worker/internal/parser/parser.go b/services/worker/internal/parser/parser.go index 32611e7..203a943 100644 --- a/services/worker/internal/parser/parser.go +++ b/services/worker/internal/parser/parser.go @@ -3,12 +3,13 @@ package parser import ( "crypto/sha256" "fmt" - "github.com/Fuwn/asa-news/internal/model" - "github.com/mmcdole/gofeed" + "net/url" "strconv" "strings" "time" "unicode/utf8" + "github.com/Fuwn/asa-news/internal/model" + "github.com/mmcdole/gofeed" ) type Parser struct { @@ -102,16 +103,72 @@ func stringPointerOrNil(value string) *string { return &value } +var trackingQueryParameters = map[string]bool{ + "utm_source": true, + "utm_medium": true, + "utm_campaign": true, + "utm_term": true, + "utm_content": true, + "utm_id": true, + "ref": true, + "fbclid": true, + "gclid": true, + "mc_cid": true, + "mc_eid": true, + "_hsenc": true, + "_hsmi": true, + "source": true, + "dest": true, +} + +func normalizeGloballyUniqueIdentifier(rawIdentifier string) string { + normalized := strings.TrimSpace(rawIdentifier) + + if !strings.HasPrefix(normalized, "http://") && !strings.HasPrefix(normalized, "https://") { + return normalized + } + + parsedURL, parseError := url.Parse(normalized) + + if parseError != nil { + return normalized + } + + queryParameters := parsedURL.Query() + filteredParameters := url.Values{} + + for parameterName, parameterValues := range queryParameters { + loweredName := strings.ToLower(parameterName) + + if !trackingQueryParameters[loweredName] { + filteredParameters[parameterName] = parameterValues + } + } + + parsedURL.RawQuery = canonicalizeQueryString(filteredParameters) + parsedURL.Fragment = "" + + return parsedURL.String() +} + +func canonicalizeQueryString(parameters url.Values) string { + if len(parameters) == 0 { + return "" + } + + return parameters.Encode() +} + func resolveGloballyUniqueIdentifier(feedItem *gofeed.Item) string { if feedItem.GUID != "" { - return feedItem.GUID + return normalizeGloballyUniqueIdentifier(feedItem.GUID) } if feedItem.Link != "" { - return feedItem.Link + return normalizeGloballyUniqueIdentifier(feedItem.Link) } - hashInput := feedItem.Title + feedItem.Description + hashInput := strings.TrimSpace(feedItem.Title) + strings.TrimSpace(feedItem.Description) hashBytes := sha256.Sum256([]byte(hashInput)) return fmt.Sprintf("sha256:%x", hashBytes) diff --git a/services/worker/internal/writer/writer.go b/services/worker/internal/writer/writer.go index 543b6e6..e5c7153 100644 --- a/services/worker/internal/writer/writer.go +++ b/services/worker/internal/writer/writer.go @@ -100,7 +100,7 @@ func (feedWriter *Writer) WriteEntries(writeContext context.Context, feedEntries ELSE EXCLUDED.content_text END, image_url = EXCLUDED.image_url, - published_at = EXCLUDED.published_at, + published_at = COALESCE(entries.published_at, EXCLUDED.published_at), word_count = EXCLUDED.word_count, enclosure_url = EXCLUDED.enclosure_url, enclosure_type = EXCLUDED.enclosure_type, diff --git a/supabase/schema.sql b/supabase/schema.sql index 7d2fa6c..388154a 100644 --- a/supabase/schema.sql +++ b/supabase/schema.sql @@ -2,7 +2,7 @@ -- PostgreSQL database dump -- --- \restrict WfBftBa2FBaIUQXqx7Cp1KF3Bwxh3O5RTj1axY30px9LdUnvAINNXEbxzkAHb3i +-- \restrict wNSUmyvUwXt9aJkcfw8gLKUVSQuRUSZfaqWMfy8PywitG4rFxIkfWY9tkxN08cC -- Dumped from database version 17.6 -- Dumped by pg_dump version 17.6 @@ -463,7 +463,10 @@ BEGIN DELETE FROM public.entries WHERE published_at < NOW() - INTERVAL '90 days' AND id NOT IN ( - SELECT entry_id FROM public.user_entry_states WHERE saved = true + SELECT entry_id FROM public.user_entry_states WHERE saved = true OR read = true + ) + AND feed_id NOT IN ( + SELECT id FROM public.feeds WHERE subscriber_count > 0 ); GET DIAGNOSTICS deleted_count = ROW_COUNT; RETURN deleted_count; @@ -824,6 +827,11 @@ CREATE OR REPLACE FUNCTION "public"."get_unread_counts"() RETURNS TABLE("feed_id AND ues.user_id = auth.uid() AND ues.read = true ) + AND (e.owner_id IS NULL OR e.owner_id = auth.uid()) + AND ( + (SELECT tier FROM user_profiles WHERE id = auth.uid()) IN ('pro', 'developer') + OR e.published_at >= now() - interval '14 days' + ) GROUP BY e.feed_id; $$; @@ -3728,5 +3736,5 @@ ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TAB -- PostgreSQL database dump complete -- --- \unrestrict WfBftBa2FBaIUQXqx7Cp1KF3Bwxh3O5RTj1axY30px9LdUnvAINNXEbxzkAHb3i +-- \unrestrict wNSUmyvUwXt9aJkcfw8gLKUVSQuRUSZfaqWMfy8PywitG4rFxIkfWY9tkxN08cC |