Parse HubURL from RSS 2.0 feeds

This commit is contained in:
Peter Stuifzand 2018-05-23 22:38:24 +02:00
parent ddd850db4d
commit 6b66e5d548
3 changed files with 68 additions and 7 deletions

View File

@ -2,24 +2,81 @@ package websub
import (
"fmt"
"io/ioutil"
"net/http"
"net/url"
"strings"
"linkheader"
"rss"
)
// Fetcher return the response for a url
type Fetcher interface {
Fetch(url string) (*http.Response, error)
}
// GetHubURL finds the HubURL for topic
func GetHubURL(client *http.Client, topic string) (string, error) {
hubURL, err := parseLinkHeaders(client, topic)
if err == nil {
return hubURL, err
}
hubURL, err = parseBodyLinks(client, topic)
if err == nil {
return hubURL, err
}
return "", fmt.Errorf("No hub url found for topic %s", topic)
}
func isFeedContentType(contentType string) bool {
if strings.HasPrefix(contentType, "application/rss+xml") {
return true
}
if strings.HasPrefix(contentType, "application/atom+xml") {
return true
}
if strings.HasPrefix(contentType, "application/xml") {
return true
}
if strings.HasPrefix(contentType, "text/xml") {
return true
}
return false
}
func parseBodyLinks(client *http.Client, topic string) (string, error) {
resp, err := client.Get(topic)
if err != nil {
return "", err
}
defer resp.Body.Close()
if isFeedContentType(resp.Header.Get("Content-Type")) {
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
feed, err := rss.Parse(body)
if err != nil {
return "", err
}
if feed.HubURL != "" {
return feed.HubURL, nil
}
return "", fmt.Errorf("No hub url found in RSS feed")
}
return "", fmt.Errorf("Unknown content type of response: %s", resp.Header.Get("Content-Type"))
}
func parseLinkHeaders(client *http.Client, topic string) (string, error) {
resp, err := client.Head(topic)
if err != nil {
return "", err
}
defer resp.Body.Close()
if headers, e := resp.Header["Link"]; e {
@ -31,7 +88,7 @@ func GetHubURL(client *http.Client, topic string) (string, error) {
}
}
return "", nil
return "", fmt.Errorf("No hub url found in HTTP Link headers")
}
// Subscribe subscribes topicURL on hubURL

1
vendor/rss/rss.go vendored
View File

@ -91,6 +91,7 @@ type Feed struct {
Description string `json:"description"`
Link string `json:"link"` // Link to the creator's website.
UpdateURL string `json:"updateurl"` // URL of the feed itself.
HubURL string `json:"huburl"` // URL of the WebSub hub
Image *Image `json:"image"` // Feed icon.
Items []*Item `json:"items"`
ItemMap map[string]struct{} `json:"itemmap"` // Used in checking whether an item has been seen before.

View File

@ -30,9 +30,12 @@ func parseRSS2(data []byte) (*Feed, error) {
for _, link := range channel.Link {
if link.Rel == "" && link.Type == "" && link.Href == "" && link.Chardata != "" {
out.Link = link.Chardata
break
}
if link.Rel == "hub" {
out.HubURL = link.Href
}
}
out.Image = channel.Image.Image()
if channel.MinsToLive != 0 {
sort.Ints(channel.SkipHours)