// fetch url in different ways /* ekster - microsub server Copyright (C) 2018 Peter Stuifzand This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package fetch import ( "encoding/hex" "encoding/json" "io" "io/ioutil" "log" "net/url" "strings" "time" "rss" "p83.nl/go/ekster/pkg/jf2" "p83.nl/go/ekster/pkg/jsonfeed" "p83.nl/go/ekster/pkg/microsub" "willnorris.com/go/microformats" ) func FeedHeader(fetcher Fetcher, fetchURL, contentType string, body io.Reader) (microsub.Feed, error) { log.Printf("ProcessContent %s\n", fetchURL) log.Println("Found " + contentType) feed := microsub.Feed{} u, _ := url.Parse(fetchURL) var card interface{} if strings.HasPrefix(contentType, "text/html") { data := microformats.Parse(body, u) results := jf2.SimplifyMicroformatData(data) found := -1 for i, r := range results { if r["type"] == "card" { found = i break } } if found >= 0 { card = results[found] if as, ok := card.(string); ok { if strings.HasPrefix(as, "http") { resp, err := fetcher.Fetch(fetchURL) if err != nil { return feed, err } defer resp.Body.Close() u, _ := url.Parse(fetchURL) md := microformats.Parse(resp.Body, u) author := jf2.SimplifyMicroformatData(md) for _, a := range author { if a["type"] == "card" { card = a break } } } } // use object } feed.Type = "feed" feed.URL = fetchURL if cardMap, ok := card.(map[string]interface{}); ok { if name, ok := cardMap["name"].(string); ok { feed.Name = name } if name, ok := cardMap["photo"].(string); ok { feed.Photo = name } else if name, ok := cardMap["photo"].([]interface{}); ok { feed.Photo = name[0].(string) } } } else if strings.HasPrefix(contentType, "application/json") { // json feed? var jfeed jsonfeed.Feed dec := json.NewDecoder(body) err := dec.Decode(&jfeed) if err != nil { log.Printf("Error while parsing json feed: %s\n", err) return feed, err } feed.Type = "feed" feed.Name = jfeed.Title if feed.Name == "" { feed.Name = jfeed.Author.Name } feed.URL = jfeed.FeedURL if feed.URL == "" { feed.URL = fetchURL } feed.Photo = jfeed.Icon if feed.Photo == "" { feed.Photo = jfeed.Author.Avatar } feed.Author.Type = "card" feed.Author.Name = jfeed.Author.Name feed.Author.URL = jfeed.Author.URL feed.Author.Photo = jfeed.Author.Avatar } else if strings.HasPrefix(contentType, "text/xml") || strings.HasPrefix(contentType, "application/rss+xml") || strings.HasPrefix(contentType, "application/atom+xml") || strings.HasPrefix(contentType, "application/xml") { body, err := ioutil.ReadAll(body) if err != nil { log.Printf("Error while parsing rss/atom feed: %s\n", err) return feed, err } xfeed, err := rss.Parse(body) if err != nil { log.Printf("Error while parsing rss/atom feed: %s\n", err) return feed, err } feed.Type = "feed" feed.Name = xfeed.Title feed.URL = fetchURL feed.Description = xfeed.Description feed.Photo = xfeed.Image.URL } else { log.Printf("Unknown Content-Type: %s\n", contentType) } log.Println("Found feed: ", feed) return feed, nil } func FeedItems(fetcher Fetcher, fetchURL, contentType string, body io.Reader) ([]microsub.Item, error) { log.Printf("ProcessContent %s\n", fetchURL) log.Println("Found " + contentType) items := []microsub.Item{} u, _ := url.Parse(fetchURL) if strings.HasPrefix(contentType, "text/html") { data := microformats.Parse(body, u) results := jf2.SimplifyMicroformatData(data) found := -1 for { for i, r := range results { if r["type"] == "card" { found = i break } } if found >= 0 { card := results[found] results = append(results[:found], results[found+1:]...) for i := range results { if results[i]["type"] == "entry" && results[i]["author"] == card["url"] { results[i]["author"] = card } } found = -1 continue } break } for i, r := range results { if as, ok := r["author"].(string); ok { if r["type"] == "entry" && strings.HasPrefix(as, "http") { resp, err := fetcher.Fetch(fetchURL) if err != nil { return items, err } defer resp.Body.Close() u, _ := url.Parse(fetchURL) md := microformats.Parse(resp.Body, u) author := jf2.SimplifyMicroformatData(md) for _, a := range author { if a["type"] == "card" { results[i]["author"] = a break } } } } } // Filter items with "published" date for _, r := range results { if uid, e := r["uid"]; e { r["_id"] = hex.EncodeToString([]byte(uid.(string))) } else if uid, e := r["url"]; e { r["_id"] = hex.EncodeToString([]byte(uid.(string))) } else { continue // r["_id"] = "" // generate random value } // mapToItem adds published item := jf2.MapToItem(r) items = append(items, item) } } else if strings.HasPrefix(contentType, "application/json") { // json feed? var feed jsonfeed.Feed dec := json.NewDecoder(body) err := dec.Decode(&feed) if err != nil { log.Printf("Error while parsing json feed: %s\n", err) return items, err } log.Printf("%#v\n", feed) author := µsub.Card{} author.Type = "card" author.Name = feed.Author.Name author.URL = feed.Author.URL author.Photo = feed.Author.Avatar if author.Photo == "" { author.Photo = feed.Icon } for _, feedItem := range feed.Items { var item microsub.Item item.Type = "entry" item.Name = feedItem.Title item.Content = µsub.Content{} item.Content.HTML = feedItem.ContentHTML item.Content.Text = feedItem.ContentText item.URL = feedItem.URL item.Summary = []string{feedItem.Summary} item.ID = hex.EncodeToString([]byte(feedItem.ID)) item.Published = feedItem.DatePublished itemAuthor := µsub.Card{} itemAuthor.Type = "card" itemAuthor.Name = feedItem.Author.Name itemAuthor.URL = feedItem.Author.URL itemAuthor.Photo = feedItem.Author.Avatar if itemAuthor.URL != "" { item.Author = itemAuthor } else { item.Author = author } item.Photo = []string{feedItem.Image} items = append(items, item) } } else if strings.HasPrefix(contentType, "text/xml") || strings.HasPrefix(contentType, "application/rss+xml") || strings.HasPrefix(contentType, "application/atom+xml") || strings.HasPrefix(contentType, "application/xml") { body, err := ioutil.ReadAll(body) if err != nil { log.Printf("Error while parsing rss/atom feed: %s\n", err) return items, err } feed, err := rss.Parse(body) if err != nil { log.Printf("Error while parsing rss/atom feed: %s\n", err) return items, err } for _, feedItem := range feed.Items { var item microsub.Item item.Type = "entry" item.Name = feedItem.Title item.Content = µsub.Content{} if len(feedItem.Content) > 0 { item.Content.HTML = feedItem.Content } else if len(feedItem.Summary) > 0 { item.Content.HTML = feedItem.Summary } item.URL = feedItem.Link if feedItem.ID == "" { item.ID = hex.EncodeToString([]byte(feedItem.Link)) } else { item.ID = hex.EncodeToString([]byte(feedItem.ID)) } itemAuthor := µsub.Card{} itemAuthor.Type = "card" itemAuthor.Name = feed.Title itemAuthor.URL = feed.Link itemAuthor.Photo = feed.Image.URL item.Author = itemAuthor item.Published = feedItem.Date.Format(time.RFC3339) items = append(items, item) } } else { log.Printf("Unknown Content-Type: %s\n", contentType) } for i, v := range items { // Clear type of author, when other fields also aren't set if v.Author != nil && v.Author.Name == "" && v.Author.Photo == "" && v.Author.URL == "" { v.Type = "" items[i] = v } } for _, item := range items { log.Printf("ID=%s Name=%s\n", item.ID, item.Name) log.Printf("Author=%#v\n", item.Author) if item.Content != nil { log.Printf("Text=%s\n", item.Content.Text) log.Printf("HTML=%s\n", item.Content.HTML) } } return items, nil }