Cleanup Search(...)

- Extract findFeeds
This commit is contained in:
Peter Stuifzand 2021-08-01 21:38:40 +02:00
parent 25bdf5a4a2
commit 01b255b3f7
Signed by: peter
GPG Key ID: 374322D56E5209E8
2 changed files with 119 additions and 92 deletions

114
cmd/eksterd/feedsearch.go Normal file
View File

@ -0,0 +1,114 @@
package main
import (
"fmt"
"log"
"net/http"
"net/url"
"strings"
"p83.nl/go/ekster/pkg/fetch"
"p83.nl/go/ekster/pkg/microsub"
"willnorris.com/go/microformats"
)
func isSupportedFeedType(feedType string) bool {
return strings.HasPrefix(feedType, "text/html") ||
strings.HasPrefix(feedType, "application/json") ||
strings.HasPrefix(feedType, "application/xml") ||
strings.HasPrefix(feedType, "text/xml") ||
strings.HasPrefix(feedType, "application/rss+xml") ||
strings.HasPrefix(feedType, "application/atom+xml")
}
func findFeeds(cachingFetch fetch.FetcherFunc, feedURL string) ([]microsub.Feed, error) {
resp, err := cachingFetch(feedURL)
if err != nil {
return nil, fmt.Errorf("while fetching %s: %w", feedURL, err)
}
defer resp.Body.Close()
fetchURL, err := url.Parse(feedURL)
md := microformats.Parse(resp.Body, fetchURL)
if err != nil {
return nil, fmt.Errorf("while fetching %s: %w", feedURL, err)
}
feedResp, err := cachingFetch(fetchURL.String())
if err != nil {
return nil, fmt.Errorf("in fetch of %s: %w", fetchURL, err)
}
defer feedResp.Body.Close()
// TODO: Combine FeedHeader and FeedItems so we can use it here
parsedFeed, err := fetch.FeedHeader(cachingFetch, fetchURL.String(), feedResp.Header.Get("Content-Type"), feedResp.Body)
if err != nil {
return nil, fmt.Errorf("in parse of %s: %w", fetchURL, err)
}
var feeds []microsub.Feed
// TODO: Only include the feed if it contains some items
feeds = append(feeds, parsedFeed)
// Fetch alternates
if alts, e := md.Rels["alternate"]; e {
for _, alt := range alts {
relURL := md.RelURLs[alt]
log.Printf("alternate found with type %s %#v\n", relURL.Type, relURL)
if isSupportedFeedType(relURL.Type) {
feedResp, err := cachingFetch(alt)
if err != nil {
return nil, fmt.Errorf("fetch of %s: %v", alt, err)
}
// FIXME: don't defer in for loop (possible memory leak)
defer feedResp.Body.Close()
parsedFeed, err := fetch.FeedHeader(cachingFetch, alt, feedResp.Header.Get("Content-Type"), feedResp.Body)
if err != nil {
return nil, fmt.Errorf("in parse of %s: %v", alt, err)
}
feeds = append(feeds, parsedFeed)
}
}
}
return feeds, nil
}
func getPossibleURLs(query string) []string {
urls := []string{}
if !(strings.HasPrefix(query, "https://") || strings.HasPrefix(query, "http://")) {
secureURL := "https://" + query
if checkURL(secureURL) {
urls = append(urls, secureURL)
} else {
unsecureURL := "http://" + query
if checkURL(unsecureURL) {
urls = append(urls, unsecureURL)
}
}
} else {
urls = append(urls, query)
}
return urls
}
func checkURL(u string) bool {
testURL, err := url.Parse(u)
if err != nil {
return false
}
resp, err := http.Head(testURL.String())
if err != nil {
log.Printf("Error while HEAD %s: %v\n", u, err)
return false
}
defer resp.Body.Close()
return resp.StatusCode == 200
}

View File

@ -25,7 +25,6 @@ import (
"p83.nl/go/ekster/pkg/util"
"github.com/gomodule/redigo/redis"
"willnorris.com/go/microformats"
)
// DefaultPrio is the priority value for new channels
@ -423,42 +422,6 @@ func (b *memoryBackend) UnfollowURL(uid string, url string) error {
return nil
}
func checkURL(u string) bool {
testURL, err := url.Parse(u)
if err != nil {
return false
}
resp, err := http.Head(testURL.String())
if err != nil {
log.Printf("Error while HEAD %s: %v\n", u, err)
return false
}
defer resp.Body.Close()
return resp.StatusCode == 200
}
func getPossibleURLs(query string) []string {
urls := []string{}
if !(strings.HasPrefix(query, "https://") || strings.HasPrefix(query, "http://")) {
secureURL := "https://" + query
if checkURL(secureURL) {
urls = append(urls, secureURL)
} else {
unsecureURL := "http://" + query
if checkURL(unsecureURL) {
urls = append(urls, unsecureURL)
}
}
} else {
urls = append(urls, query)
}
return urls
}
func (b *memoryBackend) ItemSearch(channel, query string) ([]microsub.Item, error) {
return querySearch(channel, query)
}
@ -471,63 +434,13 @@ func (b *memoryBackend) Search(query string) ([]microsub.Feed, error) {
cachingFetch := WithCaching(b.pool, Fetch2)
for _, u := range urls {
log.Println(u)
resp, err := cachingFetch(u)
for _, feedURL := range urls {
log.Println(feedURL)
foundFeeds, err := findFeeds(cachingFetch, feedURL)
if err != nil {
log.Printf("Error while fetching %s: %v\n", u, err)
continue
}
defer resp.Body.Close()
fetchURL, err := url.Parse(u)
md := microformats.Parse(resp.Body, fetchURL)
if err != nil {
log.Printf("Error while fetching %s: %v\n", u, err)
continue
}
feedResp, err := cachingFetch(fetchURL.String())
if err != nil {
log.Printf("Error in fetch of %s - %v\n", fetchURL, err)
continue
}
defer feedResp.Body.Close()
// TODO: Combine FeedHeader and FeedItems so we can use it here
parsedFeed, err := fetch.FeedHeader(cachingFetch, fetchURL.String(), feedResp.Header.Get("Content-Type"), feedResp.Body)
if err != nil {
log.Printf("Error in parse of %s - %v\n", fetchURL, err)
continue
}
// TODO: Only include the feed if it contains some items
feeds = append(feeds, parsedFeed)
if alts, e := md.Rels["alternate"]; e {
for _, alt := range alts {
relURL := md.RelURLs[alt]
log.Printf("alternate found with type %s %#v\n", relURL.Type, relURL)
if strings.HasPrefix(relURL.Type, "text/html") || strings.HasPrefix(relURL.Type, "application/json") || strings.HasPrefix(relURL.Type, "application/xml") || strings.HasPrefix(relURL.Type, "text/xml") || strings.HasPrefix(relURL.Type, "application/rss+xml") || strings.HasPrefix(relURL.Type, "application/atom+xml") {
feedResp, err := cachingFetch(alt)
if err != nil {
log.Printf("Error in fetch of %s - %v\n", alt, err)
continue
}
// FIXME: don't defer in for loop (possible memory leak)
defer feedResp.Body.Close()
parsedFeed, err := fetch.FeedHeader(cachingFetch, alt, feedResp.Header.Get("Content-Type"), feedResp.Body)
if err != nil {
log.Printf("Error in parse of %s - %v\n", alt, err)
continue
}
feeds = append(feeds, parsedFeed)
}
}
log.Printf("error while finding feeds: %v", err)
}
feeds = append(feeds, foundFeeds...)
}
return feeds, nil