2018-04-06 23:29:15 +00:00
|
|
|
package rss
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/xml"
|
|
|
|
"fmt"
|
|
|
|
"sort"
|
|
|
|
"time"
|
2022-04-16 13:12:58 +00:00
|
|
|
|
|
|
|
"golang.org/x/text/cases"
|
|
|
|
"golang.org/x/text/language"
|
2018-04-06 23:29:15 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func parseRSS2(data []byte) (*Feed, error) {
|
|
|
|
warnings := false
|
|
|
|
feed := rss2_0Feed{}
|
|
|
|
p := xml.NewDecoder(bytes.NewReader(data))
|
|
|
|
p.CharsetReader = charsetReader
|
|
|
|
err := p.Decode(&feed)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if feed.Channel == nil {
|
|
|
|
return nil, fmt.Errorf("no channel found in %q", string(data))
|
|
|
|
}
|
|
|
|
|
|
|
|
channel := feed.Channel
|
|
|
|
|
|
|
|
out := new(Feed)
|
|
|
|
out.Title = channel.Title
|
|
|
|
out.Description = channel.Description
|
|
|
|
for _, link := range channel.Link {
|
|
|
|
if link.Rel == "" && link.Type == "" && link.Href == "" && link.Chardata != "" {
|
|
|
|
out.Link = link.Chardata
|
2018-05-23 20:38:24 +00:00
|
|
|
}
|
|
|
|
if link.Rel == "hub" {
|
|
|
|
out.HubURL = link.Href
|
2018-04-06 23:29:15 +00:00
|
|
|
}
|
|
|
|
}
|
2018-05-23 20:38:24 +00:00
|
|
|
|
2018-04-06 23:29:15 +00:00
|
|
|
out.Image = channel.Image.Image()
|
|
|
|
if channel.MinsToLive != 0 {
|
2022-04-16 13:12:58 +00:00
|
|
|
titleCaser := cases.Title(language.English)
|
2018-04-06 23:29:15 +00:00
|
|
|
sort.Ints(channel.SkipHours)
|
|
|
|
next := time.Now().Add(time.Duration(channel.MinsToLive) * time.Minute)
|
|
|
|
for _, hour := range channel.SkipHours {
|
|
|
|
if hour == next.Hour() {
|
|
|
|
next.Add(time.Duration(60-next.Minute()) * time.Minute)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
trying := true
|
|
|
|
for trying {
|
|
|
|
trying = false
|
|
|
|
for _, day := range channel.SkipDays {
|
2022-04-16 13:12:58 +00:00
|
|
|
if titleCaser.String(day) == next.Weekday().String() {
|
2018-04-06 23:29:15 +00:00
|
|
|
next.Add(time.Duration(24-next.Hour()) * time.Hour)
|
|
|
|
trying = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out.Refresh = next
|
|
|
|
}
|
|
|
|
|
|
|
|
if out.Refresh.IsZero() {
|
|
|
|
out.Refresh = time.Now().Add(10 * time.Minute)
|
|
|
|
}
|
|
|
|
|
|
|
|
out.Items = make([]*Item, 0, len(channel.Items))
|
|
|
|
out.ItemMap = make(map[string]struct{})
|
|
|
|
|
|
|
|
// Process items.
|
|
|
|
for _, item := range channel.Items {
|
|
|
|
|
|
|
|
if item.ID == "" {
|
|
|
|
if item.Link == "" {
|
|
|
|
if debug {
|
|
|
|
fmt.Printf("[w] Item %q has no ID or link and will be ignored.\n", item.Title)
|
|
|
|
fmt.Printf("[w] %#v\n", item)
|
|
|
|
}
|
|
|
|
warnings = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
item.ID = item.Link
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip items already known.
|
|
|
|
if _, ok := out.ItemMap[item.ID]; ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
next := new(Item)
|
|
|
|
next.Title = item.Title
|
|
|
|
next.Summary = item.Description
|
|
|
|
next.Content = item.Content
|
|
|
|
next.Category = item.Category
|
|
|
|
next.Link = item.Link
|
|
|
|
if item.Date != "" {
|
|
|
|
next.Date, err = parseTime(item.Date)
|
|
|
|
if err == nil {
|
|
|
|
item.DateValid = true
|
|
|
|
}
|
|
|
|
} else if item.PubDate != "" {
|
|
|
|
next.Date, err = parseTime(item.PubDate)
|
|
|
|
if err == nil {
|
|
|
|
item.DateValid = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
next.ID = item.ID
|
|
|
|
if len(item.Enclosures) > 0 {
|
|
|
|
next.Enclosures = make([]*Enclosure, len(item.Enclosures))
|
|
|
|
for i := range item.Enclosures {
|
|
|
|
next.Enclosures[i] = item.Enclosures[i].Enclosure()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
next.Read = false
|
|
|
|
|
|
|
|
out.Items = append(out.Items, next)
|
|
|
|
out.ItemMap[next.ID] = struct{}{}
|
|
|
|
out.Unread++
|
|
|
|
}
|
|
|
|
|
|
|
|
if warnings && debug {
|
|
|
|
fmt.Printf("[i] Encountered warnings:\n%s\n", data)
|
|
|
|
}
|
|
|
|
|
|
|
|
return out, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type rss2_0Feed struct {
|
|
|
|
XMLName xml.Name `xml:"rss"`
|
|
|
|
Channel *rss2_0Channel `xml:"channel"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type rss2_0Channel struct {
|
|
|
|
XMLName xml.Name `xml:"channel"`
|
|
|
|
Title string `xml:"title"`
|
|
|
|
Description string `xml:"description"`
|
|
|
|
Link []rss2_0Link `xml:"link"`
|
|
|
|
Image rss2_0Image `xml:"image"`
|
|
|
|
Items []rss2_0Item `xml:"item"`
|
|
|
|
MinsToLive int `xml:"ttl"`
|
|
|
|
SkipHours []int `xml:"skipHours>hour"`
|
|
|
|
SkipDays []string `xml:"skipDays>day"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type rss2_0Link struct {
|
|
|
|
Rel string `xml:"rel,attr"`
|
|
|
|
Href string `xml:"href,attr"`
|
|
|
|
Type string `xml:"type,attr"`
|
|
|
|
Chardata string `xml:",chardata"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type rss2_0Item struct {
|
|
|
|
XMLName xml.Name `xml:"item"`
|
|
|
|
Title string `xml:"title"`
|
|
|
|
Description string `xml:"description"`
|
|
|
|
Content string `xml:"encoded"`
|
|
|
|
Category string `xml:"category"`
|
|
|
|
Link string `xml:"link"`
|
|
|
|
PubDate string `xml:"pubDate"`
|
|
|
|
Date string `xml:"date"`
|
|
|
|
DateValid bool
|
|
|
|
ID string `xml:"guid"`
|
|
|
|
Enclosures []rss2_0Enclosure `xml:"enclosure"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type rss2_0Enclosure struct {
|
|
|
|
XMLName xml.Name `xml:"enclosure"`
|
|
|
|
URL string `xml:"url,attr"`
|
|
|
|
Type string `xml:"type,attr"`
|
|
|
|
Length uint `xml:"length,attr"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *rss2_0Enclosure) Enclosure() *Enclosure {
|
|
|
|
out := new(Enclosure)
|
|
|
|
out.URL = r.URL
|
|
|
|
out.Type = r.Type
|
|
|
|
out.Length = r.Length
|
|
|
|
return out
|
|
|
|
}
|
|
|
|
|
|
|
|
type rss2_0Image struct {
|
|
|
|
XMLName xml.Name `xml:"image"`
|
|
|
|
Title string `xml:"title"`
|
|
|
|
URL string `xml:"url"`
|
|
|
|
Height int `xml:"height"`
|
|
|
|
Width int `xml:"width"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *rss2_0Image) Image() *Image {
|
|
|
|
out := new(Image)
|
|
|
|
out.Title = i.Title
|
|
|
|
out.URL = i.URL
|
|
|
|
out.Height = uint32(i.Height)
|
|
|
|
out.Width = uint32(i.Width)
|
|
|
|
return out
|
|
|
|
}
|