wiki/search.go

467 lines
11 KiB
Go
Raw Normal View History

2021-08-07 17:13:10 +00:00
/*
* Wiki - A wiki with editor
* Copyright (c) 2021 Peter Stuifzand
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"encoding/json"
2020-07-04 13:07:32 +00:00
"fmt"
"log"
"net/http"
2020-07-12 16:01:32 +00:00
"os"
2020-07-01 14:40:10 +00:00
"strings"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/davecgh/go-spew/spew"
2020-10-25 14:37:31 +00:00
"github.com/iancoleman/strcase"
)
// TODO: http handler
// TODO: index all pages on start
// TODO: reindex all command
// TODO: search(query) command
type searchHandler struct {
indexMapping mapping.IndexMapping
searchIndex bleve.Index
}
2020-07-01 14:40:10 +00:00
type nameLine struct {
2020-07-12 16:01:32 +00:00
Name string `json:"name"`
Title string `json:"title"`
Line string `json:"line"`
2020-07-01 14:40:10 +00:00
}
type searchObject struct {
2021-08-08 21:44:24 +00:00
Title string `json:"title"`
Blocks []string `json:"blocks"`
Refs []nameLine `json:"refs"`
Meta map[string]interface{} `json:"meta"`
Links []ParsedLink `json:"links"`
Dates []time.Time `json:"dates"`
2020-07-01 14:40:10 +00:00
}
func NewSearchHandler(searchIndex bleve.Index) (http.Handler, error) {
return &searchHandler{
2020-07-01 14:40:10 +00:00
searchIndex: searchIndex,
}, nil
}
func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
2020-07-04 13:07:32 +00:00
defer r.Body.Close()
sess, err := NewSession(w, r)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
defer func() {
if err := sess.Flush(); err != nil {
log.Println(err)
}
}()
if !sess.LoggedIn {
fmt.Fprint(w, "{}")
return
}
2020-07-12 16:01:32 +00:00
if r.URL.Query().Get("fields") == "1" {
fields, err := s.searchIndex.Fields()
if err != nil {
http.Error(w, err.Error(), 500)
return
}
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
err = enc.Encode(&fields)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
return
} else if r.Method == "GET" && r.URL.Query().Get("reset") == "1" {
w.Header().Add("Content-Type", "text/html")
fmt.Fprint(w, `<form action="/search/" method="post"><input type=hidden name=reset value=1><input type=submit value="Reset Index"></form>`)
return
} else if r.Method == "POST" {
err = r.ParseForm()
if err != nil {
http.Error(w, err.Error(), 500)
return
}
if r.PostForm.Get("reset") == "1" {
var sw stopwatch
sw.Start("full reset")
defer sw.Stop()
2020-07-12 16:01:32 +00:00
refs := make(Refs)
mp := NewFilePages("data", nil)
pages, err := mp.AllPages()
if err != nil {
http.Error(w, err.Error(), 500)
return
}
2020-10-21 18:49:23 +00:00
for _, page := range pages {
err = saveBlocksFromPage("data", page)
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while processing blocks from page %s: %v", page.Name, err)
2020-10-21 18:49:23 +00:00
continue
}
}
sw.Lap("save blocks from pages")
2020-10-21 18:49:23 +00:00
// Reload all pages
pages, err = mp.AllPages()
if err != nil {
http.Error(w, err.Error(), 500)
return
}
2020-07-12 16:01:32 +00:00
for _, page := range pages {
2020-10-21 18:49:23 +00:00
log.Println("processing ", page.Title)
2020-07-12 16:01:32 +00:00
err = processBackrefsForPage(page, refs)
if err != nil {
log.Println("error while processing backrefs: ", err)
continue
}
}
2020-10-21 18:49:23 +00:00
sw.Lap("process backrefs for pages")
2020-10-21 18:49:23 +00:00
err = saveLinks(mp)
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while saving links %v", err)
2020-10-21 18:49:23 +00:00
http.Error(w, err.Error(), 500)
return
}
sw.Lap("save links")
2020-10-21 18:49:23 +00:00
2020-07-12 16:01:32 +00:00
err = saveBackrefs("data/backrefs.json", refs)
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while saving backrefs %v", err)
2020-07-12 16:01:32 +00:00
http.Error(w, err.Error(), 500)
return
}
sw.Lap("save backrefs")
2020-07-12 16:01:32 +00:00
err = os.RemoveAll("data/_tmp_index")
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while remove old index %v", err)
2020-07-12 16:01:32 +00:00
http.Error(w, err.Error(), 500)
return
}
_, err = createSearchIndex("data", "_tmp_index")
2020-07-12 16:01:32 +00:00
if err != nil {
http.Error(w, err.Error(), 500)
return
}
err = os.Rename("data/_page-index", "data/_page-index-old")
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while resetting index: %v", err)
2020-07-12 16:01:32 +00:00
http.Error(w, err.Error(), 500)
return
}
err = os.Rename("data/_tmp_index", "data/_page-index")
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while putthing new index in place: %v", err)
2020-07-12 16:01:32 +00:00
http.Error(w, err.Error(), 500)
return
}
err = os.RemoveAll("data/_page-index-old")
if err != nil {
2021-08-08 21:43:40 +00:00
log.Printf("error while remove old index %v", err)
2020-07-12 16:01:32 +00:00
http.Error(w, err.Error(), 500)
return
}
sw.Lap("indexing")
2020-07-12 16:01:32 +00:00
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
err = enc.Encode(struct {
Ok bool `json:"ok"`
}{Ok: true})
if err != nil {
http.Error(w, err.Error(), 500)
return
}
}
return
}
q := bleve.NewQueryStringQuery(r.URL.Query().Get("q"))
sr := bleve.NewSearchRequest(q)
2020-10-30 23:29:41 +00:00
sr.IncludeLocations = false
sr.Size = 25
sr.Fields = []string{"page", "title", "text", "date"}
2020-10-21 18:49:23 +00:00
sr.Highlight = bleve.NewHighlightWithStyle("html")
sr.Highlight.AddField("text")
results, err := s.searchIndex.Search(sr)
if err != nil {
http.Error(w, err.Error(), 500)
}
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
err = enc.Encode(&results)
if err != nil {
http.Error(w, err.Error(), 500)
}
}
2020-07-01 14:40:10 +00:00
2020-10-21 18:49:23 +00:00
type pageBlock struct {
ID string `json:"id"`
Title string `json:"title"`
Page string `json:"page"`
Text string `json:"text"`
Link []string `json:"link"`
Tag []string `json:"tag"`
Date []time.Time `json:"date"`
2020-10-21 18:49:23 +00:00
}
func (p pageBlock) Type() string {
return "block"
}
func createSearchObjects(rootBlockID string) ([]pageBlock, error) {
log.Println("createSearchObjects", rootBlockID)
2020-10-21 18:49:23 +00:00
blocks, err := loadBlocks("data", rootBlockID)
if err != nil {
return nil, err
}
if len(blocks.Parents) > 0 {
page := blocks.Parents[len(blocks.Parents)-1]
if page != rootBlockID {
blocks, err = loadBlocks("data", page)
}
}
2020-10-21 18:49:23 +00:00
var pageBlocks []pageBlock
queue := []string{blocks.PageID}
for len(queue) > 0 {
current := queue[0]
queue = queue[1:]
2020-10-30 23:29:41 +00:00
links, err := ParseLinks(current, blocks.Texts[current])
if err != nil {
log.Println("ParseLinks", err)
links = nil
2020-10-30 23:29:41 +00:00
}
var linkNames []string
for _, link := range links {
linkNames = append(linkNames, link.Name)
2020-10-30 23:29:41 +00:00
}
2020-10-21 18:49:23 +00:00
tags, err := ParseTags(blocks.Texts[current])
if err != nil {
log.Println("ParseTags", err)
tags = nil
}
dates, err := ParseDates(blocks.Texts[current])
if err != nil {
log.Println("ParseDates", err)
dates = nil
}
pageDate, err := ParseDatePageName(blocks.Texts[blocks.PageID])
if err == nil {
dates = append(dates, pageDate)
}
pageBlocks = append(pageBlocks, pageBlock{
ID: current,
Title: blocks.Texts[blocks.PageID],
Page: blocks.PageID,
Text: blocks.Texts[current],
Link: linkNames,
Tag: tags,
Date: dates,
})
2020-10-21 18:49:23 +00:00
queue = append(queue, blocks.Children[current]...)
}
if rootBlockID == "Henk_Stuifzand" {
spew.Dump(pageBlocks)
}
2020-10-21 18:49:23 +00:00
return pageBlocks, nil
}
func createStructuredFormat(page Page) (searchObject, error) {
2020-07-01 14:40:10 +00:00
so := searchObject{}
so.Title = page.Title
2021-08-08 21:44:24 +00:00
so.Meta = make(map[string]interface{})
2020-07-01 14:40:10 +00:00
type simpleListItem struct {
2021-08-08 21:44:24 +00:00
Text string
ID string
Indented int
2020-07-01 14:40:10 +00:00
}
2021-08-08 21:44:24 +00:00
type parent struct {
key string
indent int
items []interface{}
values map[string]interface{}
}
var parents []parent
parents = append(parents, parent{
values: make(map[string]interface{}),
})
2020-07-01 14:40:10 +00:00
var listItems []simpleListItem
if err := json.NewDecoder(strings.NewReader(page.Content)).Decode(&listItems); err != nil {
so.Blocks = append(so.Blocks, page.Content)
} else {
for _, li := range listItems {
meta := strings.SplitN(li.Text, "::", 2)
2021-08-08 21:44:24 +00:00
par := parents[len(parents)-1]
// merge up
for len(parents) > 1 && li.Indented <= par.indent {
parents = parents[:len(parents)-1]
nextTop := parents[len(parents)-1]
if len(par.values) > 0 {
if vals, e := nextTop.values[par.key]; e {
if vals2, ok := vals.(map[string]interface{}); ok {
for k, v := range par.values {
vals2[k] = v
}
nextTop.values[par.key] = vals2
}
} else {
nextTop.values[par.key] = par.values
}
} else if len(par.items) > 0 {
nextTop.values[par.key] = par.items
} else {
nextTop.values[par.key] = ""
}
parents[len(parents)-1] = nextTop
par = parents[len(parents)-1]
}
2020-07-01 14:40:10 +00:00
if len(meta) == 2 {
2020-10-25 14:37:31 +00:00
key := strcase.ToSnake(strings.TrimSpace(meta[0]))
2020-08-31 09:50:03 +00:00
value := strings.TrimSpace(meta[1])
2021-08-08 21:44:24 +00:00
if value == "" {
parents = append(parents, parent{
key: key,
indent: li.Indented,
values: make(map[string]interface{}),
})
} else {
if len(parents) > 0 {
par = parents[len(parents)-1]
// save new value
if li.Indented > par.indent {
links, err := ParseLinks(li.ID, value)
if err != nil {
par.values[key] = value
} else {
if len(links) > 0 {
links[0].Href = fmt.Sprintf("%s%s", *baseurl, links[0].PageName)
links[0].ID = ""
par.values[key] = links[0]
} else {
par.values[key] = value
}
}
}
parents[len(parents)-1] = par
}
}
2021-08-08 21:44:24 +00:00
} else {
links, err := ParseLinks(li.ID, li.Text)
if err != nil {
par.items = append(par.items, li.Text)
} else if len(links) > 0 {
links[0].Href = fmt.Sprintf("%s%s", *baseurl, links[0].PageName)
links[0].ID = ""
par.items = append(par.items, links[0])
} else {
par.items = append(par.items, li.Text)
}
parents[len(parents)-1] = par
2020-07-01 14:40:10 +00:00
}
2020-08-31 09:50:03 +00:00
2020-07-01 14:40:10 +00:00
so.Blocks = append(so.Blocks, li.Text)
2020-08-31 09:50:03 +00:00
links, err := ParseLinks(li.ID, li.Text)
if err != nil {
continue
}
for i, link := range links {
links[i].Href = fmt.Sprintf("%s%s", *baseurl, link.PageName)
}
so.Links = append(so.Links, links...)
dates, err := ParseDates(li.Text)
if err != nil {
dates = nil
}
so.Dates = append(so.Dates, dates...)
2020-07-01 14:40:10 +00:00
}
}
date, err := ParseDatePageName(so.Title)
if err == nil {
so.Dates = append(so.Dates, date)
}
2021-08-08 21:44:24 +00:00
// merge up
for len(parents) > 1 {
par := parents[len(parents)-1]
parents = parents[:len(parents)-1]
nextTop := parents[len(parents)-1]
if len(par.values) > 0 {
nextTop.values[par.key] = par.values
} else if len(par.items) > 0 {
nextTop.values[par.key] = par.items
} else {
nextTop.values[par.key] = ""
}
parents[len(parents)-1] = nextTop
}
so.Meta = parents[0].values
2020-07-12 16:01:32 +00:00
for _, refs := range page.Refs {
2020-07-01 14:40:10 +00:00
for _, ref := range refs {
so.Refs = append(so.Refs, nameLine{
2020-07-12 16:01:32 +00:00
ref.Name,
ref.Title,
2020-08-31 09:50:03 +00:00
strings.TrimSpace(ref.Line),
2020-07-01 14:40:10 +00:00
})
}
}
return so, nil
}