342 lines
8.6 KiB
Go
342 lines
8.6 KiB
Go
package rss
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net"
|
|
"net/http"
|
|
"strings"
|
|
"text/tabwriter"
|
|
"time"
|
|
)
|
|
|
|
// Parse RSS or Atom data.
|
|
func Parse(data []byte) (*Feed, error) {
|
|
|
|
if strings.Contains(string(data), "<rss") {
|
|
if debug {
|
|
fmt.Println("[i] Parsing as RSS 2.0")
|
|
}
|
|
return parseRSS2(data)
|
|
} else if strings.Contains(string(data), "xmlns=\"http://purl.org/rss/1.0/\"") {
|
|
if debug {
|
|
fmt.Println("[i] Parsing as RSS 1.0")
|
|
}
|
|
return parseRSS1(data)
|
|
} else {
|
|
if debug {
|
|
fmt.Println("[i] Parsing as Atom")
|
|
}
|
|
return parseAtom(data)
|
|
}
|
|
}
|
|
|
|
// A FetchFunc is a function that fetches a feed for given URL.
|
|
type FetchFunc func(url string) (resp *http.Response, err error)
|
|
|
|
// DefaultFetchFunc uses http.DefaultClient to fetch a feed.
|
|
var DefaultFetchFunc = func(url string) (resp *http.Response, err error) {
|
|
client := http.DefaultClient
|
|
return client.Get(url)
|
|
}
|
|
|
|
// Fetch downloads and parses the RSS feed at the given URL
|
|
func Fetch(url string) (*Feed, error) {
|
|
return FetchByFunc(DefaultFetchFunc, url)
|
|
}
|
|
|
|
// FetchByClient uses a http.Client to fetch a URL.
|
|
func FetchByClient(url string, client *http.Client) (*Feed, error) {
|
|
fetchFunc := func(url string) (resp *http.Response, err error) {
|
|
return client.Get(url)
|
|
}
|
|
return FetchByFunc(fetchFunc, url)
|
|
}
|
|
|
|
// FetchByFunc uses a func to fetch a URL.
|
|
func FetchByFunc(fetchFunc FetchFunc, url string) (*Feed, error) {
|
|
resp, err := fetchFunc(url)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
out, err := Parse(body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if out.Link == "" {
|
|
out.Link = url
|
|
}
|
|
|
|
out.UpdateURL = url
|
|
out.FetchFunc = fetchFunc
|
|
|
|
return out, nil
|
|
}
|
|
|
|
// Feed is the top-level structure.
|
|
type Feed struct {
|
|
Nickname string `json:"nickname"` // This is not set by the package, but could be helpful.
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
Link string `json:"link"` // Link to the creator's website.
|
|
UpdateURL string `json:"updateurl"` // URL of the feed itself.
|
|
HubURL string `json:"huburl"` // URL of the WebSub hub
|
|
Image *Image `json:"image"` // Feed icon.
|
|
Items []*Item `json:"items"`
|
|
ItemMap map[string]struct{} `json:"itemmap"` // Used in checking whether an item has been seen before.
|
|
Refresh time.Time `json:"refresh"` // Earliest time this feed should next be checked.
|
|
Unread uint32 `json:"unread"` // Number of unread items. Used by aggregators.
|
|
FetchFunc FetchFunc `json:"-"`
|
|
}
|
|
|
|
type refreshError string
|
|
|
|
var _ net.Error = refreshError("")
|
|
|
|
func (r refreshError) Error() string {
|
|
return string(r)
|
|
}
|
|
|
|
func (r refreshError) Timeout() bool {
|
|
return false
|
|
}
|
|
|
|
func (r refreshError) Temporary() bool {
|
|
return true
|
|
}
|
|
|
|
var errUpdateNotReady refreshError = "not ready to update: too soon to refresh"
|
|
|
|
// Update fetches any new items and updates f.
|
|
func (f *Feed) Update() error {
|
|
if f.FetchFunc == nil {
|
|
f.FetchFunc = DefaultFetchFunc
|
|
}
|
|
return f.UpdateByFunc(f.FetchFunc)
|
|
}
|
|
|
|
// UpdateWithContent parses content and updates f
|
|
func (f *Feed) UpdateWithContent(content []byte) error {
|
|
feed, err := Parse(content)
|
|
f.UpdateByFeed(feed)
|
|
return err
|
|
}
|
|
|
|
// UpdateByFeed parses content and updates f
|
|
func (f *Feed) UpdateByFeed(update *Feed) error {
|
|
|
|
// Check that we don't update too often.
|
|
if f.Refresh.After(time.Now()) {
|
|
return errUpdateNotReady
|
|
}
|
|
|
|
if f.UpdateURL == "" {
|
|
return errors.New("feed has no URL")
|
|
}
|
|
|
|
if f.ItemMap == nil {
|
|
f.ItemMap = make(map[string]struct{})
|
|
for _, item := range f.Items {
|
|
if _, ok := f.ItemMap[item.ID]; !ok {
|
|
f.ItemMap[item.ID] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
f.Refresh = update.Refresh
|
|
f.Title = update.Title
|
|
f.Description = update.Description
|
|
|
|
for _, item := range update.Items {
|
|
if _, ok := f.ItemMap[item.ID]; !ok {
|
|
f.Items = append(f.Items, item)
|
|
f.ItemMap[item.ID] = struct{}{}
|
|
f.Unread++
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// UpdateByFunc uses a func to update f.
|
|
func (f *Feed) UpdateByFunc(fetchFunc FetchFunc) error {
|
|
|
|
// Check that we don't update too often.
|
|
if f.Refresh.After(time.Now()) {
|
|
return errUpdateNotReady
|
|
}
|
|
|
|
if f.UpdateURL == "" {
|
|
return errors.New("feed has no URL")
|
|
}
|
|
|
|
if f.ItemMap == nil {
|
|
f.ItemMap = make(map[string]struct{})
|
|
for _, item := range f.Items {
|
|
if _, ok := f.ItemMap[item.ID]; !ok {
|
|
f.ItemMap[item.ID] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
update, err := FetchByFunc(fetchFunc, f.UpdateURL)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
f.Refresh = update.Refresh
|
|
f.Title = update.Title
|
|
f.Description = update.Description
|
|
|
|
for _, item := range update.Items {
|
|
if _, ok := f.ItemMap[item.ID]; !ok {
|
|
f.Items = append(f.Items, item)
|
|
f.ItemMap[item.ID] = struct{}{}
|
|
f.Unread++
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (f *Feed) String() string {
|
|
buf := new(bytes.Buffer)
|
|
if debug {
|
|
w := tabwriter.NewWriter(buf, 0, 8, 0, '\t', tabwriter.StripEscape)
|
|
fmt.Fprintf(w, "Feed {\n")
|
|
fmt.Fprintf(w, "\xff\t\xffNickname:\t%q\n", f.Nickname)
|
|
fmt.Fprintf(w, "\xff\t\xffTitle:\t%q\n", f.Title)
|
|
fmt.Fprintf(w, "\xff\t\xffDescription:\t%q\n", f.Description)
|
|
fmt.Fprintf(w, "\xff\t\xffLink:\t%q\n", f.Link)
|
|
fmt.Fprintf(w, "\xff\t\xffUpdateURL:\t%q\n", f.UpdateURL)
|
|
fmt.Fprintf(w, "\xff\t\xffImage:\t%q (%s)\n", f.Image.Title, f.Image.URL)
|
|
fmt.Fprintf(w, "\xff\t\xffRefresh:\t%s\n", f.Refresh.Format(DATE))
|
|
fmt.Fprintf(w, "\xff\t\xffUnread:\t%d\n", f.Unread)
|
|
fmt.Fprintf(w, "\xff\t\xffItems:\t(%d) {\n", len(f.Items))
|
|
for _, item := range f.Items {
|
|
fmt.Fprintf(w, "%s\n", item.Format(2))
|
|
}
|
|
fmt.Fprintf(w, "\xff\t\xff}\n}\n")
|
|
w.Flush()
|
|
} else {
|
|
w := buf
|
|
fmt.Fprintf(w, "Feed %q\n", f.Title)
|
|
fmt.Fprintf(w, "\t%q\n", f.Description)
|
|
fmt.Fprintf(w, "\t%q\n", f.Link)
|
|
fmt.Fprintf(w, "\t%s\n", f.Image)
|
|
fmt.Fprintf(w, "\tRefresh at %s\n", f.Refresh.Format(DATE))
|
|
fmt.Fprintf(w, "\tUnread: %d\n", f.Unread)
|
|
fmt.Fprintf(w, "\tItems:\n")
|
|
for _, item := range f.Items {
|
|
fmt.Fprintf(w, "\t%s\n", item.Format(2))
|
|
}
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
// Item represents a single story.
|
|
type Item struct {
|
|
Title string `json:"title"`
|
|
Summary string `json:"summary"`
|
|
Content string `json:"content"`
|
|
Category string `json:"category"`
|
|
Link string `json:"link"`
|
|
Date time.Time `json:"date"`
|
|
DateValid bool
|
|
ID string `json:"id"`
|
|
Enclosures []*Enclosure `json:"enclosures"`
|
|
Read bool `json:"read"`
|
|
}
|
|
|
|
func (i *Item) String() string {
|
|
return i.Format(0)
|
|
}
|
|
|
|
// Format formats an item using tabs.
|
|
func (i *Item) Format(indent int) string {
|
|
buf := new(bytes.Buffer)
|
|
single := strings.Repeat("\t", indent)
|
|
double := single + "\t"
|
|
if debug {
|
|
w := tabwriter.NewWriter(buf, 0, 8, 0, '\t', tabwriter.StripEscape)
|
|
fmt.Fprintf(w, "\xff%s\xffItem {\n", single)
|
|
fmt.Fprintf(w, "\xff%s\xffTitle:\t%q\n", double, i.Title)
|
|
fmt.Fprintf(w, "\xff%s\xffSummary:\t%q\n", double, i.Summary)
|
|
fmt.Fprintf(w, "\xff%s\xffCategory:\t%q\n", double, i.Category)
|
|
fmt.Fprintf(w, "\xff%s\xffLink:\t%s\n", double, i.Link)
|
|
fmt.Fprintf(w, "\xff%s\xffDate:\t%s\n", double, i.Date.Format(DATE))
|
|
fmt.Fprintf(w, "\xff%s\xffID:\t%s\n", double, i.ID)
|
|
fmt.Fprintf(w, "\xff%s\xffRead:\t%v\n", double, i.Read)
|
|
fmt.Fprintf(w, "\xff%s\xffContent:\t%q\n", double, i.Content)
|
|
fmt.Fprintf(w, "\xff%s\xff}\n", single)
|
|
w.Flush()
|
|
} else {
|
|
w := buf
|
|
fmt.Fprintf(w, "%sItem %q\n", single, i.Title)
|
|
fmt.Fprintf(w, "%s%q\n", double, i.Link)
|
|
fmt.Fprintf(w, "%s%s\n", double, i.Date.Format(DATE))
|
|
fmt.Fprintf(w, "%s%q\n", double, i.ID)
|
|
fmt.Fprintf(w, "%sRead: %v\n", double, i.Read)
|
|
fmt.Fprintf(w, "%s%q\n", double, i.Content)
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
// Enclosure maps an enclosure.
|
|
type Enclosure struct {
|
|
URL string `json:"url"`
|
|
Type string `json:"type"`
|
|
Length uint `json:"length"`
|
|
}
|
|
|
|
// Get uses http.Get to fetch an enclosure.
|
|
func (e *Enclosure) Get() (io.ReadCloser, error) {
|
|
if e == nil || e.URL == "" {
|
|
return nil, errors.New("no enclosure")
|
|
}
|
|
|
|
res, err := http.Get(e.URL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return res.Body, nil
|
|
}
|
|
|
|
// Image maps an image.
|
|
type Image struct {
|
|
Title string `json:"title"`
|
|
URL string `json:"url"`
|
|
Height uint32 `json:"height"`
|
|
Width uint32 `json:"width"`
|
|
}
|
|
|
|
// Get uses http.Get to fetch an image.
|
|
func (i *Image) Get() (io.ReadCloser, error) {
|
|
if i == nil || i.URL == "" {
|
|
return nil, errors.New("no image")
|
|
}
|
|
|
|
res, err := http.Get(i.URL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return res.Body, nil
|
|
}
|
|
|
|
func (i *Image) String() string {
|
|
return fmt.Sprintf("Image %q", i.Title)
|
|
}
|