Parse HubURL from RSS 2.0 feeds

This commit is contained in:
Peter Stuifzand 2018-05-23 22:38:24 +02:00
parent ddd850db4d
commit 6b66e5d548
3 changed files with 68 additions and 7 deletions

View File

@ -2,24 +2,81 @@ package websub
import ( import (
"fmt" "fmt"
"io/ioutil"
"net/http" "net/http"
"net/url" "net/url"
"strings"
"linkheader" "linkheader"
"rss"
) )
// Fetcher return the response for a url
type Fetcher interface {
Fetch(url string) (*http.Response, error)
}
// GetHubURL finds the HubURL for topic // GetHubURL finds the HubURL for topic
func GetHubURL(client *http.Client, topic string) (string, error) { func GetHubURL(client *http.Client, topic string) (string, error) {
hubURL, err := parseLinkHeaders(client, topic)
if err == nil {
return hubURL, err
}
hubURL, err = parseBodyLinks(client, topic)
if err == nil {
return hubURL, err
}
return "", fmt.Errorf("No hub url found for topic %s", topic)
}
func isFeedContentType(contentType string) bool {
if strings.HasPrefix(contentType, "application/rss+xml") {
return true
}
if strings.HasPrefix(contentType, "application/atom+xml") {
return true
}
if strings.HasPrefix(contentType, "application/xml") {
return true
}
if strings.HasPrefix(contentType, "text/xml") {
return true
}
return false
}
func parseBodyLinks(client *http.Client, topic string) (string, error) {
resp, err := client.Get(topic)
if err != nil {
return "", err
}
defer resp.Body.Close()
if isFeedContentType(resp.Header.Get("Content-Type")) {
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return "", err
}
feed, err := rss.Parse(body)
if err != nil {
return "", err
}
if feed.HubURL != "" {
return feed.HubURL, nil
}
return "", fmt.Errorf("No hub url found in RSS feed")
}
return "", fmt.Errorf("Unknown content type of response: %s", resp.Header.Get("Content-Type"))
}
func parseLinkHeaders(client *http.Client, topic string) (string, error) {
resp, err := client.Head(topic) resp, err := client.Head(topic)
if err != nil { if err != nil {
return "", err return "", err
} }
defer resp.Body.Close() defer resp.Body.Close()
if headers, e := resp.Header["Link"]; e { if headers, e := resp.Header["Link"]; e {
@ -31,7 +88,7 @@ func GetHubURL(client *http.Client, topic string) (string, error) {
} }
} }
return "", nil return "", fmt.Errorf("No hub url found in HTTP Link headers")
} }
// Subscribe subscribes topicURL on hubURL // Subscribe subscribes topicURL on hubURL

1
vendor/rss/rss.go vendored
View File

@ -91,6 +91,7 @@ type Feed struct {
Description string `json:"description"` Description string `json:"description"`
Link string `json:"link"` // Link to the creator's website. Link string `json:"link"` // Link to the creator's website.
UpdateURL string `json:"updateurl"` // URL of the feed itself. UpdateURL string `json:"updateurl"` // URL of the feed itself.
HubURL string `json:"huburl"` // URL of the WebSub hub
Image *Image `json:"image"` // Feed icon. Image *Image `json:"image"` // Feed icon.
Items []*Item `json:"items"` Items []*Item `json:"items"`
ItemMap map[string]struct{} `json:"itemmap"` // Used in checking whether an item has been seen before. ItemMap map[string]struct{} `json:"itemmap"` // Used in checking whether an item has been seen before.

View File

@ -30,9 +30,12 @@ func parseRSS2(data []byte) (*Feed, error) {
for _, link := range channel.Link { for _, link := range channel.Link {
if link.Rel == "" && link.Type == "" && link.Href == "" && link.Chardata != "" { if link.Rel == "" && link.Type == "" && link.Href == "" && link.Chardata != "" {
out.Link = link.Chardata out.Link = link.Chardata
break }
if link.Rel == "hub" {
out.HubURL = link.Href
} }
} }
out.Image = channel.Image.Image() out.Image = channel.Image.Image()
if channel.MinsToLive != 0 { if channel.MinsToLive != 0 {
sort.Ints(channel.SkipHours) sort.Ints(channel.SkipHours)