package parser
import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"testing"
"time"
)
const validRSS = `
Test Feed
https://example.com
-
entry-1
First Entry
https://example.com/1
Summary of the first entry.
<p>Full content here.</p>
Alice
Mon, 01 Jan 2024 12:00:00 GMT
-
entry-2
Second Entry
https://example.com/2
Summary of the second entry.
Tue, 02 Jan 2024 12:00:00 GMT
`
const validAtom = `
Atom Feed
atom-1
Atom Entry
An atom summary.
2024-01-01T12:00:00Z
Bob
`
const podcastRSS = `
My Podcast
https://podcast.example.com
-
ep-1
Episode 1
Mon, 01 Jan 2024 12:00:00 GMT
-
ep-2
Episode 2
Tue, 02 Jan 2024 12:00:00 GMT
`
const mixedEnclosureRSS = `
Mixed Feed
https://mixed.example.com
-
item-audio
Audio Item
-
item-text
Text Item
Just a text item.
`
const noGUIDRSS = `
No GUID Feed
https://example.com
-
No GUID but has link
https://example.com/no-guid
-
No GUID no link
Only title and description.
`
func TestParseValidRSS(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("feed-123", nil, []byte(validRSS))
require.NoError(test, parseError)
assert.Equal(test, "Test Feed", result.FeedTitle)
assert.Equal(test, "https://example.com", result.SiteURL)
assert.Len(test, result.Entries, 2)
assert.Equal(test, 0.0, result.AudioEnclosureRatio)
first := result.Entries[0]
assert.Equal(test, "entry-1", first.GUID)
assert.Equal(test, "feed-123", first.FeedIdentifier)
require.NotNil(test, first.Title)
assert.Equal(test, "First Entry", *first.Title)
require.NotNil(test, first.URL)
assert.Equal(test, "https://example.com/1", *first.URL)
require.NotNil(test, first.Summary)
assert.Equal(test, "Summary of the first entry.", *first.Summary)
require.NotNil(test, first.ContentHTML)
assert.Contains(test, *first.ContentHTML, "Full content here.")
require.NotNil(test, first.PublishedAt)
assert.Nil(test, first.OwnerIdentifier)
}
func TestParseValidAtom(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("atom-feed", nil, []byte(validAtom))
require.NoError(test, parseError)
assert.Equal(test, "Atom Feed", result.FeedTitle)
assert.Len(test, result.Entries, 1)
entry := result.Entries[0]
assert.Equal(test, "atom-1", entry.GUID)
require.NotNil(test, entry.Title)
assert.Equal(test, "Atom Entry", *entry.Title)
require.NotNil(test, entry.Author)
assert.Equal(test, "Bob", *entry.Author)
}
func TestParsePodcastFeed(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("podcast-1", nil, []byte(podcastRSS))
require.NoError(test, parseError)
assert.Equal(test, "My Podcast", result.FeedTitle)
assert.Len(test, result.Entries, 2)
assert.InDelta(test, 1.0, result.AudioEnclosureRatio, 0.01)
firstEpisode := result.Entries[0]
require.NotNil(test, firstEpisode.EnclosureURL)
assert.Equal(test, "https://cdn.example.com/ep1.mp3", *firstEpisode.EnclosureURL)
require.NotNil(test, firstEpisode.EnclosureType)
assert.Equal(test, "audio/mpeg", *firstEpisode.EnclosureType)
require.NotNil(test, firstEpisode.EnclosureLength)
assert.Equal(test, int64(12345678), *firstEpisode.EnclosureLength)
}
func TestParseMixedEnclosureFeed(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("mixed-1", nil, []byte(mixedEnclosureRSS))
require.NoError(test, parseError)
assert.Len(test, result.Entries, 2)
assert.InDelta(test, 0.5, result.AudioEnclosureRatio, 0.01)
audioItem := result.Entries[0]
require.NotNil(test, audioItem.EnclosureURL)
textItem := result.Entries[1]
assert.Nil(test, textItem.EnclosureURL)
assert.Nil(test, textItem.EnclosureType)
assert.Nil(test, textItem.EnclosureLength)
}
func TestParseGUIDFallback(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("no-guid-feed", nil, []byte(noGUIDRSS))
require.NoError(test, parseError)
assert.Len(test, result.Entries, 2)
entryWithLink := result.Entries[0]
assert.Equal(test, "https://example.com/no-guid", entryWithLink.GUID)
entryWithHash := result.Entries[1]
assert.True(test, len(entryWithHash.GUID) > 0)
assert.Contains(test, entryWithHash.GUID, "sha256:")
}
func TestParseOwnerIdentifier(test *testing.T) {
feedParser := NewParser()
ownerIdentifier := "user-abc"
result, parseError := feedParser.Parse("owned-feed", &ownerIdentifier, []byte(validRSS))
require.NoError(test, parseError)
for _, entry := range result.Entries {
require.NotNil(test, entry.OwnerIdentifier)
assert.Equal(test, "user-abc", *entry.OwnerIdentifier)
}
}
func TestParseInvalidXML(test *testing.T) {
feedParser := NewParser()
_, parseError := feedParser.Parse("bad-feed", nil, []byte("this is not xml at all"))
assert.Error(test, parseError)
}
func TestParsePublishedDateFallsBackToUpdated(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("atom-feed", nil, []byte(validAtom))
require.NoError(test, parseError)
entry := result.Entries[0]
require.NotNil(test, entry.PublishedAt)
expectedTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
assert.True(test, entry.PublishedAt.Equal(expectedTime))
}
func TestParseWordCount(test *testing.T) {
feedParser := NewParser()
result, parseError := feedParser.Parse("feed-123", nil, []byte(validRSS))
require.NoError(test, parseError)
first := result.Entries[0]
require.NotNil(test, first.WordCount)
assert.Greater(test, *first.WordCount, 0)
}
func TestStripHTMLTags(test *testing.T) {
testCases := []struct {
name string
input string
expected string
}{
{"plain text passthrough", "hello world", "hello world"},
{"strips simple tags", "
hello
", "hello"},
{"strips nested tags", "", "hello world"},
{"handles unicode", "café résumé naïve
", "café résumé naïve"},
{"empty string", "", ""},
{"only tags", "
", ""},
}
for _, testCase := range testCases {
test.Run(testCase.name, func(test *testing.T) {
assert.Equal(test, testCase.expected, stripHTMLTags(testCase.input))
})
}
}
func TestCountWords(test *testing.T) {
emptyResult := countWords("")
assert.Nil(test, emptyResult)
twoWords := countWords("hello world")
require.NotNil(test, twoWords)
assert.Equal(test, 2, *twoWords)
withExtraSpaces := countWords(" hello world ")
require.NotNil(test, withExtraSpaces)
assert.Equal(test, 2, *withExtraSpaces)
}
func TestStringPointerOrNil(test *testing.T) {
assert.Nil(test, stringPointerOrNil(""))
result := stringPointerOrNil("hello")
require.NotNil(test, result)
assert.Equal(test, "hello", *result)
}