diff options
| author | Fuwn <[email protected]> | 2026-02-07 01:42:57 -0800 |
|---|---|---|
| committer | Fuwn <[email protected]> | 2026-02-07 01:42:57 -0800 |
| commit | 5c5b1993edd890a80870ee05607ac5f088191d4e (patch) | |
| tree | a721b76bcd49ba10826c53efc87302c7a689512f /services/worker/internal/parser | |
| download | asa.news-5c5b1993edd890a80870ee05607ac5f088191d4e.tar.xz asa.news-5c5b1993edd890a80870ee05607ac5f088191d4e.zip | |
feat: asa.news RSS reader with developer tier, REST API, and webhooks
Full-stack RSS reader SaaS: Supabase + Next.js + Go worker.
Includes three subscription tiers (free/pro/developer), API key auth,
read-only REST API, webhook push notifications, Stripe billing with
proration, and PWA support.
Diffstat (limited to 'services/worker/internal/parser')
| -rw-r--r-- | services/worker/internal/parser/parser.go | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/services/worker/internal/parser/parser.go b/services/worker/internal/parser/parser.go new file mode 100644 index 0000000..1fb2f76 --- /dev/null +++ b/services/worker/internal/parser/parser.go @@ -0,0 +1,234 @@ +package parser + +import ( + "crypto/sha256" + "fmt" + "github.com/Fuwn/asa-news/internal/model" + "github.com/mmcdole/gofeed" + "strconv" + "strings" + "time" + "unicode/utf8" +) + +type Parser struct { + gofeedParser *gofeed.Parser +} + +func NewParser() *Parser { + return &Parser{ + gofeedParser: gofeed.NewParser(), + } +} + +type ParseResult struct { + Entries []model.FeedEntry + FeedTitle string + SiteURL string + AudioEnclosureRatio float64 +} + +func (feedParser *Parser) Parse(feedIdentifier string, ownerIdentifier *string, rawFeedContent []byte) (*ParseResult, error) { + parsedFeed, parseError := feedParser.gofeedParser.ParseString(string(rawFeedContent)) + + if parseError != nil { + return nil, fmt.Errorf("failed to parse feed content: %w", parseError) + } + + feedEntries := make([]model.FeedEntry, 0, len(parsedFeed.Items)) + + audioEnclosureCount := 0 + + for _, feedItem := range parsedFeed.Items { + normalizedEntry := normalizeFeedItem(feedIdentifier, ownerIdentifier, feedItem) + if normalizedEntry.EnclosureURL != nil { + audioEnclosureCount++ + } + feedEntries = append(feedEntries, normalizedEntry) + } + + audioEnclosureRatio := 0.0 + if len(feedEntries) > 0 { + audioEnclosureRatio = float64(audioEnclosureCount) / float64(len(feedEntries)) + } + + return &ParseResult{ + Entries: feedEntries, + FeedTitle: strings.TrimSpace(parsedFeed.Title), + SiteURL: strings.TrimSpace(parsedFeed.Link), + AudioEnclosureRatio: audioEnclosureRatio, + }, nil +} + +func normalizeFeedItem(feedIdentifier string, ownerIdentifier *string, feedItem *gofeed.Item) model.FeedEntry { + globallyUniqueIdentifier := resolveGloballyUniqueIdentifier(feedItem) + entryURL := stringPointerOrNil(resolveEntryURL(feedItem)) + entryTitle := stringPointerOrNil(strings.TrimSpace(feedItem.Title)) + entrySummary := stringPointerOrNil(strings.TrimSpace(feedItem.Description)) + entryContentHTML := stringPointerOrNil(resolveContentHTML(feedItem)) + entryContentText := resolveContentText(feedItem) + authorName := stringPointerOrNil(resolveAuthorName(feedItem)) + publishedAt := resolvePublishedDate(feedItem) + entryImageURL := stringPointerOrNil(resolveImageURL(feedItem)) + wordCount := countWords(entryContentText) + enclosureURL, enclosureType, enclosureLength := resolveAudioEnclosure(feedItem) + + return model.FeedEntry{ + FeedIdentifier: feedIdentifier, + OwnerIdentifier: ownerIdentifier, + GUID: globallyUniqueIdentifier, + URL: entryURL, + Title: entryTitle, + Author: authorName, + Summary: entrySummary, + ContentHTML: entryContentHTML, + ContentText: stringPointerOrNil(entryContentText), + ImageURL: entryImageURL, + PublishedAt: publishedAt, + WordCount: wordCount, + EnclosureURL: enclosureURL, + EnclosureType: enclosureType, + EnclosureLength: enclosureLength, + } +} + +func stringPointerOrNil(value string) *string { + if value == "" { + return nil + } + + return &value +} + +func resolveGloballyUniqueIdentifier(feedItem *gofeed.Item) string { + if feedItem.GUID != "" { + return feedItem.GUID + } + + if feedItem.Link != "" { + return feedItem.Link + } + + hashInput := feedItem.Title + feedItem.Description + hashBytes := sha256.Sum256([]byte(hashInput)) + + return fmt.Sprintf("sha256:%x", hashBytes) +} + +func resolveEntryURL(feedItem *gofeed.Item) string { + if feedItem.Link != "" { + return feedItem.Link + } + + if feedItem.GUID != "" && strings.HasPrefix(feedItem.GUID, "http") { + return feedItem.GUID + } + + return "" +} + +func resolveContentHTML(feedItem *gofeed.Item) string { + if feedItem.Content != "" { + return feedItem.Content + } + + return feedItem.Description +} + +func resolveContentText(feedItem *gofeed.Item) string { + contentSource := feedItem.Content + + if contentSource == "" { + contentSource = feedItem.Description + } + + return stripHTMLTags(contentSource) +} + +func resolveAuthorName(feedItem *gofeed.Item) string { + if feedItem.Author != nil && feedItem.Author.Name != "" { + return feedItem.Author.Name + } + + if len(feedItem.Authors) > 0 && feedItem.Authors[0].Name != "" { + return feedItem.Authors[0].Name + } + + return "" +} + +func resolvePublishedDate(feedItem *gofeed.Item) *time.Time { + if feedItem.PublishedParsed != nil { + return feedItem.PublishedParsed + } + + if feedItem.UpdatedParsed != nil { + return feedItem.UpdatedParsed + } + + return nil +} + +func resolveAudioEnclosure(feedItem *gofeed.Item) (*string, *string, *int64) { + if feedItem.Enclosures == nil { + return nil, nil, nil + } + + for _, enclosure := range feedItem.Enclosures { + if strings.HasPrefix(enclosure.Type, "audio/") && enclosure.URL != "" { + enclosureURL := enclosure.URL + enclosureType := enclosure.Type + + var enclosureLength *int64 + if enclosure.Length != "" { + if parsedLength, parseError := strconv.ParseInt(enclosure.Length, 10, 64); parseError == nil { + enclosureLength = &parsedLength + } + } + + return &enclosureURL, &enclosureType, enclosureLength + } + } + + return nil, nil, nil +} + +func resolveImageURL(feedItem *gofeed.Item) string { + if feedItem.Image != nil && feedItem.Image.URL != "" { + return feedItem.Image.URL + } + + return "" +} + +func countWords(plainText string) *int { + if plainText == "" { + return nil + } + + count := len(strings.Fields(plainText)) + + return &count +} + +func stripHTMLTags(htmlContent string) string { + var resultBuilder strings.Builder + + insideTag := false + + for characterIndex := 0; characterIndex < len(htmlContent); { + currentRune, runeSize := utf8.DecodeRuneInString(htmlContent[characterIndex:]) + + if currentRune == '<' { + insideTag = true + } else if currentRune == '>' { + insideTag = false + } else if !insideTag { + resultBuilder.WriteRune(currentRune) + } + + characterIndex += runeSize + } + + return strings.TrimSpace(resultBuilder.String()) +} |