// fetch url in different ways /* Microsub server Copyright (C) 2018 Peter Stuifzand This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package main import ( "encoding/hex" "encoding/json" "fmt" "log" "net/http" "net/url" "os" "strings" "time" "github.com/garyburd/redigo/redis" "github.com/pstuifzand/microsub-server/microsub" "willnorris.com/go/microformats" ) type cacheItem struct { item *microformats.Data created time.Time } var cache map[string]cacheItem func init() { cache = make(map[string]cacheItem) } // Fetch3 fills stuff func (b *memoryBackend) Fetch3(channel, fetchURL string) error { log.Printf("Fetching channel=%s fetchURL=%s\n", channel, fetchURL) channelKey := fmt.Sprintf("channel:%s:posts", channel) md, err := Fetch2(fetchURL) if err != nil { return err } results := simplifyMicroformatData(md) found := -1 for { for i, r := range results { if r["type"] == "card" { found = i break } } if found >= 0 { card := results[found] results = append(results[:found], results[found+1:]...) for i := range results { if results[i]["type"] == "entry" && results[i]["author"] == card["url"] { results[i]["author"] = card } } found = -1 continue } break } for i, r := range results { if as, ok := r["author"].(string); ok { if r["type"] == "entry" && strings.HasPrefix(as, "http") { md, _ := Fetch2(as) author := simplifyMicroformatData(md) for _, a := range author { if a["type"] == "card" { results[i]["author"] = a break } } } } } // Filter items with "published" date for _, r := range results { r["_is_read"] = b.wasRead(channel, r) if r["_is_read"].(bool) { continue } if uid, e := r["uid"]; e { r["_id"] = hex.EncodeToString([]byte(uid.(string))) } else if uid, e := r["url"]; e { r["_id"] = hex.EncodeToString([]byte(uid.(string))) } else { r["_id"] = "" // generate random value } if _, e := r["published"]; e { item := mapToItem(r) // send to redis data, err := json.Marshal(item) if err != nil { log.Printf("error while creating item for redis: %v\n", err) continue } forRedis := redisItem{ Id: item.Id, Published: item.Published, Read: item.Read, Data: data, } itemKey := fmt.Sprintf("item:%s", item.Id) _, err = redis.String(b.Redis.Do("HMSET", redis.Args{}.Add(itemKey).AddFlat(&forRedis)...)) if err != nil { log.Printf("error while writing item for redis: %v\n", err) continue } _, err = b.Redis.Do("SADD", channelKey, itemKey) if err != nil { log.Printf("error while adding item %s to channel %s for redis: %v\n", itemKey, channelKey, err) continue } } } return nil } type redisItem struct { Id string Published string Read bool Data []byte } // Fetch2 fetches stuff func Fetch2(fetchURL string) (*microformats.Data, error) { if !strings.HasPrefix(fetchURL, "http") { return nil, fmt.Errorf("error parsing %s as url", fetchURL) } u, err := url.Parse(fetchURL) if err != nil { return nil, fmt.Errorf("error parsing %s as url: %s", fetchURL, err) } if data, e := cache[u.String()]; e { if data.created.After(time.Now().Add(time.Minute * -10)) { log.Printf("HIT %s - %s\n", u.String(), time.Now().Sub(data.created).String()) return data.item, nil } else { log.Printf("EXPIRE %s\n", u.String()) delete(cache, u.String()) } } else { log.Printf("MISS %s\n", u.String()) } resp, err := http.Get(u.String()) if err != nil { return nil, fmt.Errorf("error while fetching %s: %s", u, err) } if !strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") { return nil, fmt.Errorf("Content Type of %s = %s", fetchURL, resp.Header.Get("Content-Type")) } defer resp.Body.Close() data := microformats.Parse(resp.Body, u) cache[u.String()] = cacheItem{data, time.Now()} return data, nil } func Fetch(fetchURL string) []microsub.Item { result := []microsub.Item{} if !strings.HasPrefix(fetchURL, "http") { return result } u, err := url.Parse(fetchURL) if err != nil { log.Printf("error parsing %s as url: %s", fetchURL, err) return result } resp, err := http.Get(u.String()) if err != nil { log.Printf("error while fetching %s: %s", u, err) return result } if !strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") { log.Printf("Content Type of %s = %s", fetchURL, resp.Header.Get("Content-Type")) return result } defer resp.Body.Close() data := microformats.Parse(resp.Body, u) jw := json.NewEncoder(os.Stdout) jw.SetIndent("", " ") jw.Encode(data) author := microsub.Author{} for _, item := range data.Items { if item.Type[0] == "h-feed" { for _, child := range item.Children { previewItem := convertMfToItem(child) result = append(result, previewItem) } } else if item.Type[0] == "h-card" { mf := item author.Filled = true author.Type = "card" for prop, value := range mf.Properties { switch prop { case "url": author.URL = value[0].(string) break case "name": author.Name = value[0].(string) break case "photo": author.Photo = value[0].(string) break default: fmt.Printf("prop name not implemented for author: %s with value %#v\n", prop, value) break } } } else if item.Type[0] == "h-entry" { previewItem := convertMfToItem(item) result = append(result, previewItem) } } for i, item := range result { if !item.Author.Filled { result[i].Author = author } } return result } func convertMfToItem(mf *microformats.Microformat) microsub.Item { item := microsub.Item{} item.Type = mf.Type[0] for prop, value := range mf.Properties { switch prop { case "published": item.Published = value[0].(string) break case "url": item.URL = value[0].(string) break case "name": item.Name = value[0].(string) break case "latitude": item.Latitude = value[0].(string) break case "longitude": item.Longitude = value[0].(string) break case "like-of": for _, v := range value { item.LikeOf = append(item.LikeOf, v.(string)) } break case "bookmark-of": for _, v := range value { item.BookmarkOf = append(item.BookmarkOf, v.(string)) } break case "in-reply-to": for _, v := range value { item.InReplyTo = append(item.InReplyTo, v.(string)) } break case "summary": if content, ok := value[0].(map[string]interface{}); ok { item.Content.HTML = content["html"].(string) item.Content.Text = content["value"].(string) } else if content, ok := value[0].(string); ok { item.Content.Text = content } break case "photo": for _, v := range value { item.Photo = append(item.Photo, v.(string)) } break case "category": for _, v := range value { item.Category = append(item.Category, v.(string)) } break case "content": if content, ok := value[0].(map[string]interface{}); ok { item.Content.HTML = content["html"].(string) item.Content.Text = content["value"].(string) } else if content, ok := value[0].(string); ok { item.Content.Text = content } break default: fmt.Printf("prop name not implemented: %s with value %#v\n", prop, value) break } } if item.Name == strings.TrimSpace(item.Content.Text) { item.Name = "" } // TODO: for like name is the field that is set if item.Content.HTML == "" && len(item.LikeOf) > 0 { item.Name = "" } fmt.Printf("%#v\n", item) return item }