diff options
Diffstat (limited to 'services/worker/internal/fetcher/fetcher.go')
| -rw-r--r-- | services/worker/internal/fetcher/fetcher.go | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/services/worker/internal/fetcher/fetcher.go b/services/worker/internal/fetcher/fetcher.go new file mode 100644 index 0000000..019bd39 --- /dev/null +++ b/services/worker/internal/fetcher/fetcher.go @@ -0,0 +1,116 @@ +package fetcher + +import ( + "context" + "fmt" + "io" + "net/http" + "time" +) + +type FetchResult struct { + Body []byte + StatusCode int + EntityTag string + LastModifiedHeader string + NotModified bool +} + +type Fetcher struct { + httpClient *http.Client +} + +func NewFetcher(fetchTimeout time.Duration) *Fetcher { + return &Fetcher{ + httpClient: &http.Client{ + Timeout: fetchTimeout, + CheckRedirect: func(request *http.Request, previousRequests []*http.Request) error { + if len(previousRequests) >= 5 { + return fmt.Errorf("too many redirects (exceeded 5)") + } + + redirectValidationError := ValidateRedirectTarget(request.URL.String()) + if redirectValidationError != nil { + return fmt.Errorf("blocked redirect to reserved address: %w", redirectValidationError) + } + + return nil + }, + }, + } +} + +func (feedFetcher *Fetcher) Fetch( + requestContext context.Context, + feedURL string, + previousEntityTag string, + previousLastModified string, + authenticationConfig AuthenticationConfiguration, +) (*FetchResult, error) { + urlValidationError := ValidateFeedURL(feedURL) + if urlValidationError != nil { + return nil, fmt.Errorf("blocked request to disallowed URL: %w", urlValidationError) + } + + request, requestCreationError := http.NewRequestWithContext(requestContext, http.MethodGet, feedURL, nil) + + if requestCreationError != nil { + return nil, fmt.Errorf("failed to create HTTP request: %w", requestCreationError) + } + + request.Header.Set("User-Agent", "asa.news Feed Worker/1.0") + request.Header.Set("Accept", "application/rss+xml, application/atom+xml, application/xml, text/xml, */*") + + if previousEntityTag != "" { + request.Header.Set("If-None-Match", previousEntityTag) + } + + if previousLastModified != "" { + request.Header.Set("If-Modified-Since", previousLastModified) + } + + authenticationError := ApplyAuthentication(request, authenticationConfig) + + if authenticationError != nil { + return nil, fmt.Errorf("failed to apply authentication: %w", authenticationError) + } + + response, requestError := feedFetcher.httpClient.Do(request) + + if requestError != nil { + classifiedError := ClassifyError(requestError, 0) + + return nil, classifiedError + } + + defer response.Body.Close() + + if response.StatusCode == http.StatusNotModified { + return &FetchResult{ + StatusCode: response.StatusCode, + EntityTag: response.Header.Get("ETag"), + LastModifiedHeader: response.Header.Get("Last-Modified"), + NotModified: true, + }, nil + } + + if response.StatusCode >= 400 { + classifiedError := ClassifyError(nil, response.StatusCode) + + return nil, classifiedError + } + + responseBody, readError := io.ReadAll(io.LimitReader(response.Body, 10*1024*1024)) + + if readError != nil { + return nil, fmt.Errorf("failed to read response body: %w", readError) + } + + return &FetchResult{ + Body: responseBody, + StatusCode: response.StatusCode, + EntityTag: response.Header.Get("ETag"), + LastModifiedHeader: response.Header.Get("Last-Modified"), + NotModified: false, + }, nil +} |