Also Fetch JSON feed, RSS, and Atom

This commit is contained in:
Peter Stuifzand 2018-04-07 20:50:07 +02:00
parent cf5d4c0a49
commit 925e914d01
3 changed files with 158 additions and 71 deletions

View File

@ -22,10 +22,12 @@ import (
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io/ioutil"
"log" "log"
"net/http" "net/http"
"net/url" "net/url"
"os" "os"
"rss"
"strings" "strings"
"time" "time"
@ -49,69 +51,126 @@ func init() {
func (b *memoryBackend) Fetch3(channel, fetchURL string) error { func (b *memoryBackend) Fetch3(channel, fetchURL string) error {
log.Printf("Fetching channel=%s fetchURL=%s\n", channel, fetchURL) log.Printf("Fetching channel=%s fetchURL=%s\n", channel, fetchURL)
md, err := Fetch2(fetchURL) resp, err := Fetch2(fetchURL)
if err != nil { if err != nil {
return err return err
} }
defer resp.Body.Close()
u, _ := url.Parse(fetchURL)
results := simplifyMicroformatData(md) contentType := resp.Header.Get("Content-Type")
if strings.HasPrefix(contentType, "text/html") {
found := -1 data := microformats.Parse(resp.Body, u)
for { results := simplifyMicroformatData(data)
for i, r := range results { found := -1
if r["type"] == "card" { for {
found = i for i, r := range results {
break if r["type"] == "card" {
} found = i
} break
if found >= 0 {
card := results[found]
results = append(results[:found], results[found+1:]...)
for i := range results {
if results[i]["type"] == "entry" && results[i]["author"] == card["url"] {
results[i]["author"] = card
} }
} }
found = -1 if found >= 0 {
continue card := results[found]
results = append(results[:found], results[found+1:]...)
for i := range results {
if results[i]["type"] == "entry" && results[i]["author"] == card["url"] {
results[i]["author"] = card
}
}
found = -1
continue
}
break
} }
break
}
for i, r := range results { for i, r := range results {
if as, ok := r["author"].(string); ok { if as, ok := r["author"].(string); ok {
if r["type"] == "entry" && strings.HasPrefix(as, "http") { if r["type"] == "entry" && strings.HasPrefix(as, "http") {
md, _ := Fetch2(as) resp, err := Fetch2(fetchURL)
author := simplifyMicroformatData(md) if err != nil {
for _, a := range author { return err
if a["type"] == "card" { }
results[i]["author"] = a defer resp.Body.Close()
break u, _ := url.Parse(fetchURL)
md := microformats.Parse(resp.Body, u)
author := simplifyMicroformatData(md)
for _, a := range author {
if a["type"] == "card" {
results[i]["author"] = a
break
}
} }
} }
} }
} }
}
// Filter items with "published" date // Filter items with "published" date
for _, r := range results { for _, r := range results {
r["_is_read"] = b.wasRead(channel, r) r["_is_read"] = b.wasRead(channel, r)
if r["_is_read"].(bool) { if r["_is_read"].(bool) {
continue continue
} }
if uid, e := r["uid"]; e { if uid, e := r["uid"]; e {
r["_id"] = hex.EncodeToString([]byte(uid.(string))) r["_id"] = hex.EncodeToString([]byte(uid.(string)))
} else if uid, e := r["url"]; e { } else if uid, e := r["url"]; e {
r["_id"] = hex.EncodeToString([]byte(uid.(string))) r["_id"] = hex.EncodeToString([]byte(uid.(string)))
} else { } else {
r["_id"] = "" // generate random value r["_id"] = "" // generate random value
} }
if _, e := r["published"]; e { if _, e := r["published"]; e {
item := mapToItem(r) item := mapToItem(r)
b.channelAddItem(channel, item)
}
}
} else if strings.HasPrefix(contentType, "application/json") { // json feed?
var feed JSONFeed
dec := json.NewDecoder(resp.Body)
err = dec.Decode(&feed)
if err != nil {
log.Printf("Error while parsing json feed: %s\n", err)
return err
}
for _, feedItem := range feed.Items {
var item microsub.Item
item.Name = feedItem.Title
item.Content.HTML = feedItem.ContentHTML
item.Content.Text = feedItem.ContentText
item.URL = feedItem.URL
item.Summary = []string{feedItem.Summary}
item.Id = feedItem.ID
item.Published = feedItem.DatePublished
b.channelAddItem(channel, item) b.channelAddItem(channel, item)
} }
} else if strings.HasPrefix(contentType, "application/rss+xml") || strings.HasPrefix(contentType, "application/atom+xml") {
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Error while parsing rss/atom feed: %s\n", err)
return err
}
feed, err := rss.Parse(body)
if err != nil {
log.Printf("Error while parsing rss/atom feed: %s\n", err)
return err
}
for _, feedItem := range feed.Items {
var item microsub.Item
item.Name = feedItem.Title
item.Content.HTML = feedItem.Content
item.URL = feedItem.Link
item.Summary = []string{feedItem.Summary}
item.Id = feedItem.ID
item.Published = feedItem.Date.Format(time.RFC822Z)
b.channelAddItem(channel, item)
}
} else {
log.Printf("Unknown Content-Type: %s\n", contentType)
} }
return nil return nil
} }
@ -154,7 +213,7 @@ type redisItem struct {
} }
// Fetch2 fetches stuff // Fetch2 fetches stuff
func Fetch2(fetchURL string) (*microformats.Data, error) { func Fetch2(fetchURL string) (*http.Response, error) {
if !strings.HasPrefix(fetchURL, "http") { if !strings.HasPrefix(fetchURL, "http") {
return nil, fmt.Errorf("error parsing %s as url", fetchURL) return nil, fmt.Errorf("error parsing %s as url", fetchURL)
} }
@ -164,31 +223,12 @@ func Fetch2(fetchURL string) (*microformats.Data, error) {
return nil, fmt.Errorf("error parsing %s as url: %s", fetchURL, err) return nil, fmt.Errorf("error parsing %s as url: %s", fetchURL, err)
} }
if data, e := cache[u.String()]; e {
if data.created.After(time.Now().Add(time.Minute * -10)) {
log.Printf("HIT %s - %s\n", u.String(), time.Now().Sub(data.created).String())
return data.item, nil
} else {
log.Printf("EXPIRE %s\n", u.String())
delete(cache, u.String())
}
} else {
log.Printf("MISS %s\n", u.String())
}
resp, err := http.Get(u.String()) resp, err := http.Get(u.String())
if err != nil { if err != nil {
return nil, fmt.Errorf("error while fetching %s: %s", u, err) return nil, fmt.Errorf("error while fetching %s: %s", u, err)
} }
if !strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") { return resp, err
return nil, fmt.Errorf("Content Type of %s = %s", fetchURL, resp.Header.Get("Content-Type"))
}
defer resp.Body.Close()
data := microformats.Parse(resp.Body, u)
cache[u.String()] = cacheItem{data, time.Now()}
return data, nil
} }
func Fetch(fetchURL string) []microsub.Item { func Fetch(fetchURL string) []microsub.Item {

30
cmd/server/jsonfeed.go Normal file
View File

@ -0,0 +1,30 @@
package main
type JSONFeedAttachment struct {
URL string `json:"url"`
MimeType string `json:"mime_type"`
Title string `json:"title,omitempty"`
SizeInBytes int `json:"size_in_bytes,omitempty"`
DurationInSeconds int `json:"duration_in_seconds,omitempty"`
}
type JSONFeedItem struct {
ID string `json:"id"`
ContentText string `json:"content_text,omitempty"`
ContentHTML string `json:"content_html,omitempty"`
Summary string `json:"summary,omitempty"`
Title string `json:"title,omitempty"`
URL string `json:"url,omitempty"`
ExternalURL string `json:"external_url,omitempty"`
DatePublished string `json:"date_published,omitempty"`
Tags []string `json:"tags,omitempty"`
Attachments []JSONFeedAttachment `json:"attachments,omitempty"`
}
type JSONFeed struct {
Version string `json:"version"`
Title string `json:"title"`
HomePageURL string `json:"home_page_url"`
FeedURL string `json:"feed_url"`
Items []JSONFeedItem `json:"items"`
}

View File

@ -31,6 +31,7 @@ import (
"github.com/garyburd/redigo/redis" "github.com/garyburd/redigo/redis"
"github.com/pstuifzand/microsub-server/microsub" "github.com/pstuifzand/microsub-server/microsub"
"willnorris.com/go/microformats"
) )
type memoryBackend struct { type memoryBackend struct {
@ -147,8 +148,8 @@ func mapToAuthor(result map[string]interface{}) microsub.Author {
if name, e := result["name"]; e { if name, e := result["name"]; e {
item.Name = name.(string) item.Name = name.(string)
} }
if url, e := result["url"]; e { if u, e := result["url"]; e {
item.URL = url.(string) item.URL = u.(string)
} }
if photo, e := result["photo"]; e { if photo, e := result["photo"]; e {
item.Photo = photo.(string) item.Photo = photo.(string)
@ -394,7 +395,13 @@ func (b *memoryBackend) Search(query string) []microsub.Feed {
feeds := []microsub.Feed{} feeds := []microsub.Feed{}
for _, u := range urls { for _, u := range urls {
md, err := Fetch2(u) resp, err := Fetch2(u)
if err != nil {
log.Printf("Error while fetching %s: %v\n", u, err)
continue
}
fetchUrl, err := url.Parse(u)
md := microformats.Parse(resp.Body, fetchUrl)
if err != nil { if err != nil {
log.Printf("Error while fetching %s: %v\n", u, err) log.Printf("Error while fetching %s: %v\n", u, err)
continue continue
@ -421,7 +428,17 @@ func (b *memoryBackend) Search(query string) []microsub.Feed {
} }
func (b *memoryBackend) PreviewURL(previewURL string) microsub.Timeline { func (b *memoryBackend) PreviewURL(previewURL string) microsub.Timeline {
md, err := Fetch2(previewURL) resp, err := Fetch2(previewURL)
if err != nil {
log.Printf("Error while fetching %s: %v\n", previewURL, err)
return microsub.Timeline{}
}
fetchUrl, err := url.Parse(previewURL)
md := microformats.Parse(resp.Body, fetchUrl)
if err != nil {
log.Printf("Error while fetching %s: %v\n", previewURL, err)
return microsub.Timeline{}
}
if err != nil { if err != nil {
log.Println(err) log.Println(err)
return microsub.Timeline{} return microsub.Timeline{}