summaryrefslogtreecommitdiff
path: root/services/worker/internal/parser
diff options
context:
space:
mode:
authorFuwn <[email protected]>2026-02-07 01:42:57 -0800
committerFuwn <[email protected]>2026-02-07 01:42:57 -0800
commit5c5b1993edd890a80870ee05607ac5f088191d4e (patch)
treea721b76bcd49ba10826c53efc87302c7a689512f /services/worker/internal/parser
downloadasa.news-5c5b1993edd890a80870ee05607ac5f088191d4e.tar.xz
asa.news-5c5b1993edd890a80870ee05607ac5f088191d4e.zip
feat: asa.news RSS reader with developer tier, REST API, and webhooks
Full-stack RSS reader SaaS: Supabase + Next.js + Go worker. Includes three subscription tiers (free/pro/developer), API key auth, read-only REST API, webhook push notifications, Stripe billing with proration, and PWA support.
Diffstat (limited to 'services/worker/internal/parser')
-rw-r--r--services/worker/internal/parser/parser.go234
1 files changed, 234 insertions, 0 deletions
diff --git a/services/worker/internal/parser/parser.go b/services/worker/internal/parser/parser.go
new file mode 100644
index 0000000..1fb2f76
--- /dev/null
+++ b/services/worker/internal/parser/parser.go
@@ -0,0 +1,234 @@
+package parser
+
+import (
+ "crypto/sha256"
+ "fmt"
+ "github.com/Fuwn/asa-news/internal/model"
+ "github.com/mmcdole/gofeed"
+ "strconv"
+ "strings"
+ "time"
+ "unicode/utf8"
+)
+
+type Parser struct {
+ gofeedParser *gofeed.Parser
+}
+
+func NewParser() *Parser {
+ return &Parser{
+ gofeedParser: gofeed.NewParser(),
+ }
+}
+
+type ParseResult struct {
+ Entries []model.FeedEntry
+ FeedTitle string
+ SiteURL string
+ AudioEnclosureRatio float64
+}
+
+func (feedParser *Parser) Parse(feedIdentifier string, ownerIdentifier *string, rawFeedContent []byte) (*ParseResult, error) {
+ parsedFeed, parseError := feedParser.gofeedParser.ParseString(string(rawFeedContent))
+
+ if parseError != nil {
+ return nil, fmt.Errorf("failed to parse feed content: %w", parseError)
+ }
+
+ feedEntries := make([]model.FeedEntry, 0, len(parsedFeed.Items))
+
+ audioEnclosureCount := 0
+
+ for _, feedItem := range parsedFeed.Items {
+ normalizedEntry := normalizeFeedItem(feedIdentifier, ownerIdentifier, feedItem)
+ if normalizedEntry.EnclosureURL != nil {
+ audioEnclosureCount++
+ }
+ feedEntries = append(feedEntries, normalizedEntry)
+ }
+
+ audioEnclosureRatio := 0.0
+ if len(feedEntries) > 0 {
+ audioEnclosureRatio = float64(audioEnclosureCount) / float64(len(feedEntries))
+ }
+
+ return &ParseResult{
+ Entries: feedEntries,
+ FeedTitle: strings.TrimSpace(parsedFeed.Title),
+ SiteURL: strings.TrimSpace(parsedFeed.Link),
+ AudioEnclosureRatio: audioEnclosureRatio,
+ }, nil
+}
+
+func normalizeFeedItem(feedIdentifier string, ownerIdentifier *string, feedItem *gofeed.Item) model.FeedEntry {
+ globallyUniqueIdentifier := resolveGloballyUniqueIdentifier(feedItem)
+ entryURL := stringPointerOrNil(resolveEntryURL(feedItem))
+ entryTitle := stringPointerOrNil(strings.TrimSpace(feedItem.Title))
+ entrySummary := stringPointerOrNil(strings.TrimSpace(feedItem.Description))
+ entryContentHTML := stringPointerOrNil(resolveContentHTML(feedItem))
+ entryContentText := resolveContentText(feedItem)
+ authorName := stringPointerOrNil(resolveAuthorName(feedItem))
+ publishedAt := resolvePublishedDate(feedItem)
+ entryImageURL := stringPointerOrNil(resolveImageURL(feedItem))
+ wordCount := countWords(entryContentText)
+ enclosureURL, enclosureType, enclosureLength := resolveAudioEnclosure(feedItem)
+
+ return model.FeedEntry{
+ FeedIdentifier: feedIdentifier,
+ OwnerIdentifier: ownerIdentifier,
+ GUID: globallyUniqueIdentifier,
+ URL: entryURL,
+ Title: entryTitle,
+ Author: authorName,
+ Summary: entrySummary,
+ ContentHTML: entryContentHTML,
+ ContentText: stringPointerOrNil(entryContentText),
+ ImageURL: entryImageURL,
+ PublishedAt: publishedAt,
+ WordCount: wordCount,
+ EnclosureURL: enclosureURL,
+ EnclosureType: enclosureType,
+ EnclosureLength: enclosureLength,
+ }
+}
+
+func stringPointerOrNil(value string) *string {
+ if value == "" {
+ return nil
+ }
+
+ return &value
+}
+
+func resolveGloballyUniqueIdentifier(feedItem *gofeed.Item) string {
+ if feedItem.GUID != "" {
+ return feedItem.GUID
+ }
+
+ if feedItem.Link != "" {
+ return feedItem.Link
+ }
+
+ hashInput := feedItem.Title + feedItem.Description
+ hashBytes := sha256.Sum256([]byte(hashInput))
+
+ return fmt.Sprintf("sha256:%x", hashBytes)
+}
+
+func resolveEntryURL(feedItem *gofeed.Item) string {
+ if feedItem.Link != "" {
+ return feedItem.Link
+ }
+
+ if feedItem.GUID != "" && strings.HasPrefix(feedItem.GUID, "http") {
+ return feedItem.GUID
+ }
+
+ return ""
+}
+
+func resolveContentHTML(feedItem *gofeed.Item) string {
+ if feedItem.Content != "" {
+ return feedItem.Content
+ }
+
+ return feedItem.Description
+}
+
+func resolveContentText(feedItem *gofeed.Item) string {
+ contentSource := feedItem.Content
+
+ if contentSource == "" {
+ contentSource = feedItem.Description
+ }
+
+ return stripHTMLTags(contentSource)
+}
+
+func resolveAuthorName(feedItem *gofeed.Item) string {
+ if feedItem.Author != nil && feedItem.Author.Name != "" {
+ return feedItem.Author.Name
+ }
+
+ if len(feedItem.Authors) > 0 && feedItem.Authors[0].Name != "" {
+ return feedItem.Authors[0].Name
+ }
+
+ return ""
+}
+
+func resolvePublishedDate(feedItem *gofeed.Item) *time.Time {
+ if feedItem.PublishedParsed != nil {
+ return feedItem.PublishedParsed
+ }
+
+ if feedItem.UpdatedParsed != nil {
+ return feedItem.UpdatedParsed
+ }
+
+ return nil
+}
+
+func resolveAudioEnclosure(feedItem *gofeed.Item) (*string, *string, *int64) {
+ if feedItem.Enclosures == nil {
+ return nil, nil, nil
+ }
+
+ for _, enclosure := range feedItem.Enclosures {
+ if strings.HasPrefix(enclosure.Type, "audio/") && enclosure.URL != "" {
+ enclosureURL := enclosure.URL
+ enclosureType := enclosure.Type
+
+ var enclosureLength *int64
+ if enclosure.Length != "" {
+ if parsedLength, parseError := strconv.ParseInt(enclosure.Length, 10, 64); parseError == nil {
+ enclosureLength = &parsedLength
+ }
+ }
+
+ return &enclosureURL, &enclosureType, enclosureLength
+ }
+ }
+
+ return nil, nil, nil
+}
+
+func resolveImageURL(feedItem *gofeed.Item) string {
+ if feedItem.Image != nil && feedItem.Image.URL != "" {
+ return feedItem.Image.URL
+ }
+
+ return ""
+}
+
+func countWords(plainText string) *int {
+ if plainText == "" {
+ return nil
+ }
+
+ count := len(strings.Fields(plainText))
+
+ return &count
+}
+
+func stripHTMLTags(htmlContent string) string {
+ var resultBuilder strings.Builder
+
+ insideTag := false
+
+ for characterIndex := 0; characterIndex < len(htmlContent); {
+ currentRune, runeSize := utf8.DecodeRuneInString(htmlContent[characterIndex:])
+
+ if currentRune == '<' {
+ insideTag = true
+ } else if currentRune == '>' {
+ insideTag = false
+ } else if !insideTag {
+ resultBuilder.WriteRune(currentRune)
+ }
+
+ characterIndex += runeSize
+ }
+
+ return strings.TrimSpace(resultBuilder.String())
+}