summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFuwn <[email protected]>2026-02-12 00:49:03 -0800
committerFuwn <[email protected]>2026-02-12 00:49:42 -0800
commit2911927a2d9fdd5616c2eda5643143f601068888 (patch)
tree79726e2f2babc4a1da58c30b59d22981fbbdfe26
parentRedump latest Supabase schema (diff)
downloadasa.news-2911927a2d9fdd5616c2eda5643143f601068888.tar.xz
asa.news-2911927a2d9fdd5616c2eda5643143f601068888.zip
fix: prevent read entries from reverting to unread on re-fetch
Root cause: cleanup_stale_entries deleted read-but-unsaved entries from active feeds, then the Go worker re-inserted them with new UUIDs, orphaning the user_entry_states rows and making entries appear unread. - cleanup_stale_entries: skip feeds with active subscribers and preserve entries that have been read (not just saved) - Go parser: normalize GUIDs by trimming whitespace and stripping tracking query parameters from URL-based identifiers - Go writer: preserve original published_at on upsert instead of overwriting, preventing old entries from jumping to timeline top - get_unread_counts: apply same time boundary as get_timeline so ancient re-inserted entries don't inflate counts - Realtime listener: ignore INSERT events for entries older than 48h to suppress misleading "new entries" notifications from re-inserts
-rw-r--r--apps/web/lib/hooks/use-realtime-entries.ts14
-rw-r--r--services/worker/internal/fetcher/errors_test.go2
-rw-r--r--services/worker/internal/parser/parser.go67
-rw-r--r--services/worker/internal/writer/writer.go2
-rw-r--r--supabase/schema.sql14
5 files changed, 89 insertions, 10 deletions
diff --git a/apps/web/lib/hooks/use-realtime-entries.ts b/apps/web/lib/hooks/use-realtime-entries.ts
index 22551f9..63d0eed 100644
--- a/apps/web/lib/hooks/use-realtime-entries.ts
+++ b/apps/web/lib/hooks/use-realtime-entries.ts
@@ -9,6 +9,7 @@ import { useNotificationStore } from "@/lib/stores/notification-store"
import { useUserInterfaceStore } from "@/lib/stores/user-interface-store"
const DEBOUNCE_MILLISECONDS = 3000
+const STALE_ENTRY_THRESHOLD_HOURS = 48
export function useRealtimeEntries() {
const queryClient = useQueryClient()
@@ -66,7 +67,18 @@ export function useRealtimeEntries() {
schema: "public",
table: "entries",
},
- () => {
+ (payload) => {
+ const publishedAt = payload.new?.published_at
+ if (publishedAt) {
+ const entryAge =
+ Date.now() - new Date(publishedAt).getTime()
+ const thresholdMilliseconds =
+ STALE_ENTRY_THRESHOLD_HOURS * 60 * 60 * 1000
+ if (entryAge > thresholdMilliseconds) {
+ return
+ }
+ }
+
pendingCountReference.current++
if (debounceTimerReference.current) {
diff --git a/services/worker/internal/fetcher/errors_test.go b/services/worker/internal/fetcher/errors_test.go
index e81251b..5c425aa 100644
--- a/services/worker/internal/fetcher/errors_test.go
+++ b/services/worker/internal/fetcher/errors_test.go
@@ -107,7 +107,9 @@ func TestClassifyTimeoutError(test *testing.T) {
type timeoutErr struct{}
func (timeoutError *timeoutErr) Error() string { return "connection timed out" }
+
func (timeoutError *timeoutErr) Timeout() bool { return true }
+
func (timeoutError *timeoutErr) Temporary() bool { return true }
func TestClassifyNetworkOpError(test *testing.T) {
diff --git a/services/worker/internal/parser/parser.go b/services/worker/internal/parser/parser.go
index 32611e7..203a943 100644
--- a/services/worker/internal/parser/parser.go
+++ b/services/worker/internal/parser/parser.go
@@ -3,12 +3,13 @@ package parser
import (
"crypto/sha256"
"fmt"
- "github.com/Fuwn/asa-news/internal/model"
- "github.com/mmcdole/gofeed"
+ "net/url"
"strconv"
"strings"
"time"
"unicode/utf8"
+ "github.com/Fuwn/asa-news/internal/model"
+ "github.com/mmcdole/gofeed"
)
type Parser struct {
@@ -102,16 +103,72 @@ func stringPointerOrNil(value string) *string {
return &value
}
+var trackingQueryParameters = map[string]bool{
+ "utm_source": true,
+ "utm_medium": true,
+ "utm_campaign": true,
+ "utm_term": true,
+ "utm_content": true,
+ "utm_id": true,
+ "ref": true,
+ "fbclid": true,
+ "gclid": true,
+ "mc_cid": true,
+ "mc_eid": true,
+ "_hsenc": true,
+ "_hsmi": true,
+ "source": true,
+ "dest": true,
+}
+
+func normalizeGloballyUniqueIdentifier(rawIdentifier string) string {
+ normalized := strings.TrimSpace(rawIdentifier)
+
+ if !strings.HasPrefix(normalized, "http://") && !strings.HasPrefix(normalized, "https://") {
+ return normalized
+ }
+
+ parsedURL, parseError := url.Parse(normalized)
+
+ if parseError != nil {
+ return normalized
+ }
+
+ queryParameters := parsedURL.Query()
+ filteredParameters := url.Values{}
+
+ for parameterName, parameterValues := range queryParameters {
+ loweredName := strings.ToLower(parameterName)
+
+ if !trackingQueryParameters[loweredName] {
+ filteredParameters[parameterName] = parameterValues
+ }
+ }
+
+ parsedURL.RawQuery = canonicalizeQueryString(filteredParameters)
+ parsedURL.Fragment = ""
+
+ return parsedURL.String()
+}
+
+func canonicalizeQueryString(parameters url.Values) string {
+ if len(parameters) == 0 {
+ return ""
+ }
+
+ return parameters.Encode()
+}
+
func resolveGloballyUniqueIdentifier(feedItem *gofeed.Item) string {
if feedItem.GUID != "" {
- return feedItem.GUID
+ return normalizeGloballyUniqueIdentifier(feedItem.GUID)
}
if feedItem.Link != "" {
- return feedItem.Link
+ return normalizeGloballyUniqueIdentifier(feedItem.Link)
}
- hashInput := feedItem.Title + feedItem.Description
+ hashInput := strings.TrimSpace(feedItem.Title) + strings.TrimSpace(feedItem.Description)
hashBytes := sha256.Sum256([]byte(hashInput))
return fmt.Sprintf("sha256:%x", hashBytes)
diff --git a/services/worker/internal/writer/writer.go b/services/worker/internal/writer/writer.go
index 543b6e6..e5c7153 100644
--- a/services/worker/internal/writer/writer.go
+++ b/services/worker/internal/writer/writer.go
@@ -100,7 +100,7 @@ func (feedWriter *Writer) WriteEntries(writeContext context.Context, feedEntries
ELSE EXCLUDED.content_text
END,
image_url = EXCLUDED.image_url,
- published_at = EXCLUDED.published_at,
+ published_at = COALESCE(entries.published_at, EXCLUDED.published_at),
word_count = EXCLUDED.word_count,
enclosure_url = EXCLUDED.enclosure_url,
enclosure_type = EXCLUDED.enclosure_type,
diff --git a/supabase/schema.sql b/supabase/schema.sql
index 7d2fa6c..388154a 100644
--- a/supabase/schema.sql
+++ b/supabase/schema.sql
@@ -2,7 +2,7 @@
-- PostgreSQL database dump
--
--- \restrict WfBftBa2FBaIUQXqx7Cp1KF3Bwxh3O5RTj1axY30px9LdUnvAINNXEbxzkAHb3i
+-- \restrict wNSUmyvUwXt9aJkcfw8gLKUVSQuRUSZfaqWMfy8PywitG4rFxIkfWY9tkxN08cC
-- Dumped from database version 17.6
-- Dumped by pg_dump version 17.6
@@ -463,7 +463,10 @@ BEGIN
DELETE FROM public.entries
WHERE published_at < NOW() - INTERVAL '90 days'
AND id NOT IN (
- SELECT entry_id FROM public.user_entry_states WHERE saved = true
+ SELECT entry_id FROM public.user_entry_states WHERE saved = true OR read = true
+ )
+ AND feed_id NOT IN (
+ SELECT id FROM public.feeds WHERE subscriber_count > 0
);
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
@@ -824,6 +827,11 @@ CREATE OR REPLACE FUNCTION "public"."get_unread_counts"() RETURNS TABLE("feed_id
AND ues.user_id = auth.uid()
AND ues.read = true
)
+ AND (e.owner_id IS NULL OR e.owner_id = auth.uid())
+ AND (
+ (SELECT tier FROM user_profiles WHERE id = auth.uid()) IN ('pro', 'developer')
+ OR e.published_at >= now() - interval '14 days'
+ )
GROUP BY e.feed_id;
$$;
@@ -3728,5 +3736,5 @@ ALTER DEFAULT PRIVILEGES FOR ROLE "postgres" IN SCHEMA "public" GRANT ALL ON TAB
-- PostgreSQL database dump complete
--
--- \unrestrict WfBftBa2FBaIUQXqx7Cp1KF3Bwxh3O5RTj1axY30px9LdUnvAINNXEbxzkAHb3i
+-- \unrestrict wNSUmyvUwXt9aJkcfw8gLKUVSQuRUSZfaqWMfy8PywitG4rFxIkfWY9tkxN08cC