package parser import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "testing" "time" ) const validRSS = ` Test Feed https://example.com entry-1 First Entry https://example.com/1 Summary of the first entry. <p>Full content here.</p> Alice Mon, 01 Jan 2024 12:00:00 GMT entry-2 Second Entry https://example.com/2 Summary of the second entry. Tue, 02 Jan 2024 12:00:00 GMT ` const validAtom = ` Atom Feed atom-1 Atom Entry An atom summary. 2024-01-01T12:00:00Z Bob ` const podcastRSS = ` My Podcast https://podcast.example.com ep-1 Episode 1 Mon, 01 Jan 2024 12:00:00 GMT ep-2 Episode 2 Tue, 02 Jan 2024 12:00:00 GMT ` const mixedEnclosureRSS = ` Mixed Feed https://mixed.example.com item-audio Audio Item item-text Text Item Just a text item. ` const noGUIDRSS = ` No GUID Feed https://example.com No GUID but has link https://example.com/no-guid No GUID no link Only title and description. ` func TestParseValidRSS(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("feed-123", nil, []byte(validRSS)) require.NoError(test, parseError) assert.Equal(test, "Test Feed", result.FeedTitle) assert.Equal(test, "https://example.com", result.SiteURL) assert.Len(test, result.Entries, 2) assert.Equal(test, 0.0, result.AudioEnclosureRatio) first := result.Entries[0] assert.Equal(test, "entry-1", first.GUID) assert.Equal(test, "feed-123", first.FeedIdentifier) require.NotNil(test, first.Title) assert.Equal(test, "First Entry", *first.Title) require.NotNil(test, first.URL) assert.Equal(test, "https://example.com/1", *first.URL) require.NotNil(test, first.Summary) assert.Equal(test, "Summary of the first entry.", *first.Summary) require.NotNil(test, first.ContentHTML) assert.Contains(test, *first.ContentHTML, "Full content here.") require.NotNil(test, first.PublishedAt) assert.Nil(test, first.OwnerIdentifier) } func TestParseValidAtom(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("atom-feed", nil, []byte(validAtom)) require.NoError(test, parseError) assert.Equal(test, "Atom Feed", result.FeedTitle) assert.Len(test, result.Entries, 1) entry := result.Entries[0] assert.Equal(test, "atom-1", entry.GUID) require.NotNil(test, entry.Title) assert.Equal(test, "Atom Entry", *entry.Title) require.NotNil(test, entry.Author) assert.Equal(test, "Bob", *entry.Author) } func TestParsePodcastFeed(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("podcast-1", nil, []byte(podcastRSS)) require.NoError(test, parseError) assert.Equal(test, "My Podcast", result.FeedTitle) assert.Len(test, result.Entries, 2) assert.InDelta(test, 1.0, result.AudioEnclosureRatio, 0.01) firstEpisode := result.Entries[0] require.NotNil(test, firstEpisode.EnclosureURL) assert.Equal(test, "https://cdn.example.com/ep1.mp3", *firstEpisode.EnclosureURL) require.NotNil(test, firstEpisode.EnclosureType) assert.Equal(test, "audio/mpeg", *firstEpisode.EnclosureType) require.NotNil(test, firstEpisode.EnclosureLength) assert.Equal(test, int64(12345678), *firstEpisode.EnclosureLength) } func TestParseMixedEnclosureFeed(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("mixed-1", nil, []byte(mixedEnclosureRSS)) require.NoError(test, parseError) assert.Len(test, result.Entries, 2) assert.InDelta(test, 0.5, result.AudioEnclosureRatio, 0.01) audioItem := result.Entries[0] require.NotNil(test, audioItem.EnclosureURL) textItem := result.Entries[1] assert.Nil(test, textItem.EnclosureURL) assert.Nil(test, textItem.EnclosureType) assert.Nil(test, textItem.EnclosureLength) } func TestParseGUIDFallback(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("no-guid-feed", nil, []byte(noGUIDRSS)) require.NoError(test, parseError) assert.Len(test, result.Entries, 2) entryWithLink := result.Entries[0] assert.Equal(test, "https://example.com/no-guid", entryWithLink.GUID) entryWithHash := result.Entries[1] assert.True(test, len(entryWithHash.GUID) > 0) assert.Contains(test, entryWithHash.GUID, "sha256:") } func TestParseOwnerIdentifier(test *testing.T) { feedParser := NewParser() ownerIdentifier := "user-abc" result, parseError := feedParser.Parse("owned-feed", &ownerIdentifier, []byte(validRSS)) require.NoError(test, parseError) for _, entry := range result.Entries { require.NotNil(test, entry.OwnerIdentifier) assert.Equal(test, "user-abc", *entry.OwnerIdentifier) } } func TestParseInvalidXML(test *testing.T) { feedParser := NewParser() _, parseError := feedParser.Parse("bad-feed", nil, []byte("this is not xml at all")) assert.Error(test, parseError) } func TestParsePublishedDateFallsBackToUpdated(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("atom-feed", nil, []byte(validAtom)) require.NoError(test, parseError) entry := result.Entries[0] require.NotNil(test, entry.PublishedAt) expectedTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) assert.True(test, entry.PublishedAt.Equal(expectedTime)) } func TestParseWordCount(test *testing.T) { feedParser := NewParser() result, parseError := feedParser.Parse("feed-123", nil, []byte(validRSS)) require.NoError(test, parseError) first := result.Entries[0] require.NotNil(test, first.WordCount) assert.Greater(test, *first.WordCount, 0) } func TestStripHTMLTags(test *testing.T) { testCases := []struct { name string input string expected string }{ {"plain text passthrough", "hello world", "hello world"}, {"strips simple tags", "

hello

", "hello"}, {"strips nested tags", "

hello world

", "hello world"}, {"handles unicode", "

café résumé naïve

", "café résumé naïve"}, {"empty string", "", ""}, {"only tags", "

", ""}, } for _, testCase := range testCases { test.Run(testCase.name, func(test *testing.T) { assert.Equal(test, testCase.expected, stripHTMLTags(testCase.input)) }) } } func TestCountWords(test *testing.T) { emptyResult := countWords("") assert.Nil(test, emptyResult) twoWords := countWords("hello world") require.NotNil(test, twoWords) assert.Equal(test, 2, *twoWords) withExtraSpaces := countWords(" hello world ") require.NotNil(test, withExtraSpaces) assert.Equal(test, 2, *withExtraSpaces) } func TestStringPointerOrNil(test *testing.T) { assert.Nil(test, stringPointerOrNil("")) result := stringPointerOrNil("hello") require.NotNil(test, result) assert.Equal(test, "hello", *result) }