summaryrefslogtreecommitdiff
path: root/services/worker/internal/fetcher/fetcher.go
diff options
context:
space:
mode:
Diffstat (limited to 'services/worker/internal/fetcher/fetcher.go')
-rw-r--r--services/worker/internal/fetcher/fetcher.go116
1 files changed, 116 insertions, 0 deletions
diff --git a/services/worker/internal/fetcher/fetcher.go b/services/worker/internal/fetcher/fetcher.go
new file mode 100644
index 0000000..019bd39
--- /dev/null
+++ b/services/worker/internal/fetcher/fetcher.go
@@ -0,0 +1,116 @@
+package fetcher
+
+import (
+ "context"
+ "fmt"
+ "io"
+ "net/http"
+ "time"
+)
+
+type FetchResult struct {
+ Body []byte
+ StatusCode int
+ EntityTag string
+ LastModifiedHeader string
+ NotModified bool
+}
+
+type Fetcher struct {
+ httpClient *http.Client
+}
+
+func NewFetcher(fetchTimeout time.Duration) *Fetcher {
+ return &Fetcher{
+ httpClient: &http.Client{
+ Timeout: fetchTimeout,
+ CheckRedirect: func(request *http.Request, previousRequests []*http.Request) error {
+ if len(previousRequests) >= 5 {
+ return fmt.Errorf("too many redirects (exceeded 5)")
+ }
+
+ redirectValidationError := ValidateRedirectTarget(request.URL.String())
+ if redirectValidationError != nil {
+ return fmt.Errorf("blocked redirect to reserved address: %w", redirectValidationError)
+ }
+
+ return nil
+ },
+ },
+ }
+}
+
+func (feedFetcher *Fetcher) Fetch(
+ requestContext context.Context,
+ feedURL string,
+ previousEntityTag string,
+ previousLastModified string,
+ authenticationConfig AuthenticationConfiguration,
+) (*FetchResult, error) {
+ urlValidationError := ValidateFeedURL(feedURL)
+ if urlValidationError != nil {
+ return nil, fmt.Errorf("blocked request to disallowed URL: %w", urlValidationError)
+ }
+
+ request, requestCreationError := http.NewRequestWithContext(requestContext, http.MethodGet, feedURL, nil)
+
+ if requestCreationError != nil {
+ return nil, fmt.Errorf("failed to create HTTP request: %w", requestCreationError)
+ }
+
+ request.Header.Set("User-Agent", "asa.news Feed Worker/1.0")
+ request.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml, */*")
+
+ if previousEntityTag != "" {
+ request.Header.Set("If-None-Match", previousEntityTag)
+ }
+
+ if previousLastModified != "" {
+ request.Header.Set("If-Modified-Since", previousLastModified)
+ }
+
+ authenticationError := ApplyAuthentication(request, authenticationConfig)
+
+ if authenticationError != nil {
+ return nil, fmt.Errorf("failed to apply authentication: %w", authenticationError)
+ }
+
+ response, requestError := feedFetcher.httpClient.Do(request)
+
+ if requestError != nil {
+ classifiedError := ClassifyError(requestError, 0)
+
+ return nil, classifiedError
+ }
+
+ defer response.Body.Close()
+
+ if response.StatusCode == http.StatusNotModified {
+ return &FetchResult{
+ StatusCode: response.StatusCode,
+ EntityTag: response.Header.Get("ETag"),
+ LastModifiedHeader: response.Header.Get("Last-Modified"),
+ NotModified: true,
+ }, nil
+ }
+
+ if response.StatusCode >= 400 {
+ classifiedError := ClassifyError(nil, response.StatusCode)
+
+ return nil, classifiedError
+ }
+
+ responseBody, readError := io.ReadAll(io.LimitReader(response.Body, 10*1024*1024))
+
+ if readError != nil {
+ return nil, fmt.Errorf("failed to read response body: %w", readError)
+ }
+
+ return &FetchResult{
+ Body: responseBody,
+ StatusCode: response.StatusCode,
+ EntityTag: response.Header.Get("ETag"),
+ LastModifiedHeader: response.Header.Get("Last-Modified"),
+ NotModified: false,
+ }, nil
+}