From 01b255b3f77310ea2eed85d5465aa716639e66f7 Mon Sep 17 00:00:00 2001 From: Peter Stuifzand Date: Sun, 1 Aug 2021 21:38:40 +0200 Subject: [PATCH] Cleanup Search(...) - Extract findFeeds --- cmd/eksterd/feedsearch.go | 114 ++++++++++++++++++++++++++++++++++++++ cmd/eksterd/memory.go | 97 ++------------------------------ 2 files changed, 119 insertions(+), 92 deletions(-) create mode 100644 cmd/eksterd/feedsearch.go diff --git a/cmd/eksterd/feedsearch.go b/cmd/eksterd/feedsearch.go new file mode 100644 index 0000000..cfcd1e7 --- /dev/null +++ b/cmd/eksterd/feedsearch.go @@ -0,0 +1,114 @@ +package main + +import ( + "fmt" + "log" + "net/http" + "net/url" + "strings" + + "p83.nl/go/ekster/pkg/fetch" + "p83.nl/go/ekster/pkg/microsub" + "willnorris.com/go/microformats" +) + +func isSupportedFeedType(feedType string) bool { + return strings.HasPrefix(feedType, "text/html") || + strings.HasPrefix(feedType, "application/json") || + strings.HasPrefix(feedType, "application/xml") || + strings.HasPrefix(feedType, "text/xml") || + strings.HasPrefix(feedType, "application/rss+xml") || + strings.HasPrefix(feedType, "application/atom+xml") +} + +func findFeeds(cachingFetch fetch.FetcherFunc, feedURL string) ([]microsub.Feed, error) { + resp, err := cachingFetch(feedURL) + if err != nil { + return nil, fmt.Errorf("while fetching %s: %w", feedURL, err) + } + defer resp.Body.Close() + + fetchURL, err := url.Parse(feedURL) + md := microformats.Parse(resp.Body, fetchURL) + if err != nil { + return nil, fmt.Errorf("while fetching %s: %w", feedURL, err) + } + + feedResp, err := cachingFetch(fetchURL.String()) + if err != nil { + return nil, fmt.Errorf("in fetch of %s: %w", fetchURL, err) + } + defer feedResp.Body.Close() + + // TODO: Combine FeedHeader and FeedItems so we can use it here + parsedFeed, err := fetch.FeedHeader(cachingFetch, fetchURL.String(), feedResp.Header.Get("Content-Type"), feedResp.Body) + if err != nil { + return nil, fmt.Errorf("in parse of %s: %w", fetchURL, err) + } + + var feeds []microsub.Feed + + // TODO: Only include the feed if it contains some items + feeds = append(feeds, parsedFeed) + + // Fetch alternates + if alts, e := md.Rels["alternate"]; e { + for _, alt := range alts { + relURL := md.RelURLs[alt] + log.Printf("alternate found with type %s %#v\n", relURL.Type, relURL) + if isSupportedFeedType(relURL.Type) { + feedResp, err := cachingFetch(alt) + if err != nil { + return nil, fmt.Errorf("fetch of %s: %v", alt, err) + } + + // FIXME: don't defer in for loop (possible memory leak) + defer feedResp.Body.Close() + + parsedFeed, err := fetch.FeedHeader(cachingFetch, alt, feedResp.Header.Get("Content-Type"), feedResp.Body) + if err != nil { + return nil, fmt.Errorf("in parse of %s: %v", alt, err) + } + + feeds = append(feeds, parsedFeed) + } + } + } + return feeds, nil +} + +func getPossibleURLs(query string) []string { + urls := []string{} + if !(strings.HasPrefix(query, "https://") || strings.HasPrefix(query, "http://")) { + secureURL := "https://" + query + if checkURL(secureURL) { + urls = append(urls, secureURL) + } else { + unsecureURL := "http://" + query + if checkURL(unsecureURL) { + urls = append(urls, unsecureURL) + } + } + } else { + urls = append(urls, query) + } + return urls +} + +func checkURL(u string) bool { + testURL, err := url.Parse(u) + if err != nil { + return false + } + + resp, err := http.Head(testURL.String()) + + if err != nil { + log.Printf("Error while HEAD %s: %v\n", u, err) + return false + } + + defer resp.Body.Close() + + return resp.StatusCode == 200 +} diff --git a/cmd/eksterd/memory.go b/cmd/eksterd/memory.go index b104300..1dc09be 100644 --- a/cmd/eksterd/memory.go +++ b/cmd/eksterd/memory.go @@ -25,7 +25,6 @@ import ( "p83.nl/go/ekster/pkg/util" "github.com/gomodule/redigo/redis" - "willnorris.com/go/microformats" ) // DefaultPrio is the priority value for new channels @@ -423,42 +422,6 @@ func (b *memoryBackend) UnfollowURL(uid string, url string) error { return nil } -func checkURL(u string) bool { - testURL, err := url.Parse(u) - if err != nil { - return false - } - - resp, err := http.Head(testURL.String()) - - if err != nil { - log.Printf("Error while HEAD %s: %v\n", u, err) - return false - } - - defer resp.Body.Close() - - return resp.StatusCode == 200 -} - -func getPossibleURLs(query string) []string { - urls := []string{} - if !(strings.HasPrefix(query, "https://") || strings.HasPrefix(query, "http://")) { - secureURL := "https://" + query - if checkURL(secureURL) { - urls = append(urls, secureURL) - } else { - unsecureURL := "http://" + query - if checkURL(unsecureURL) { - urls = append(urls, unsecureURL) - } - } - } else { - urls = append(urls, query) - } - return urls -} - func (b *memoryBackend) ItemSearch(channel, query string) ([]microsub.Item, error) { return querySearch(channel, query) } @@ -471,63 +434,13 @@ func (b *memoryBackend) Search(query string) ([]microsub.Feed, error) { cachingFetch := WithCaching(b.pool, Fetch2) - for _, u := range urls { - log.Println(u) - resp, err := cachingFetch(u) + for _, feedURL := range urls { + log.Println(feedURL) + foundFeeds, err := findFeeds(cachingFetch, feedURL) if err != nil { - log.Printf("Error while fetching %s: %v\n", u, err) - continue - } - defer resp.Body.Close() - - fetchURL, err := url.Parse(u) - md := microformats.Parse(resp.Body, fetchURL) - if err != nil { - log.Printf("Error while fetching %s: %v\n", u, err) - continue - } - - feedResp, err := cachingFetch(fetchURL.String()) - if err != nil { - log.Printf("Error in fetch of %s - %v\n", fetchURL, err) - continue - } - defer feedResp.Body.Close() - - // TODO: Combine FeedHeader and FeedItems so we can use it here - parsedFeed, err := fetch.FeedHeader(cachingFetch, fetchURL.String(), feedResp.Header.Get("Content-Type"), feedResp.Body) - if err != nil { - log.Printf("Error in parse of %s - %v\n", fetchURL, err) - continue - } - - // TODO: Only include the feed if it contains some items - feeds = append(feeds, parsedFeed) - - if alts, e := md.Rels["alternate"]; e { - for _, alt := range alts { - relURL := md.RelURLs[alt] - log.Printf("alternate found with type %s %#v\n", relURL.Type, relURL) - - if strings.HasPrefix(relURL.Type, "text/html") || strings.HasPrefix(relURL.Type, "application/json") || strings.HasPrefix(relURL.Type, "application/xml") || strings.HasPrefix(relURL.Type, "text/xml") || strings.HasPrefix(relURL.Type, "application/rss+xml") || strings.HasPrefix(relURL.Type, "application/atom+xml") { - feedResp, err := cachingFetch(alt) - if err != nil { - log.Printf("Error in fetch of %s - %v\n", alt, err) - continue - } - // FIXME: don't defer in for loop (possible memory leak) - defer feedResp.Body.Close() - - parsedFeed, err := fetch.FeedHeader(cachingFetch, alt, feedResp.Header.Get("Content-Type"), feedResp.Body) - if err != nil { - log.Printf("Error in parse of %s - %v\n", alt, err) - continue - } - - feeds = append(feeds, parsedFeed) - } - } + log.Printf("error while finding feeds: %v", err) } + feeds = append(feeds, foundFeeds...) } return feeds, nil