2021-08-07 17:13:10 +00:00
|
|
|
/*
|
|
|
|
* Wiki - A wiki with editor
|
|
|
|
* Copyright (c) 2021 Peter Stuifzand
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2020-06-30 20:56:12 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
2020-07-04 13:07:32 +00:00
|
|
|
"fmt"
|
|
|
|
"log"
|
2020-06-30 20:56:12 +00:00
|
|
|
"net/http"
|
2020-07-12 16:01:32 +00:00
|
|
|
"os"
|
2020-07-01 14:40:10 +00:00
|
|
|
"strings"
|
2020-06-30 20:56:12 +00:00
|
|
|
|
|
|
|
"github.com/blevesearch/bleve"
|
|
|
|
"github.com/blevesearch/bleve/mapping"
|
2020-10-25 14:37:31 +00:00
|
|
|
"github.com/iancoleman/strcase"
|
2020-06-30 20:56:12 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// TODO: http handler
|
|
|
|
// TODO: index all pages on start
|
|
|
|
// TODO: reindex all command
|
|
|
|
// TODO: search(query) command
|
|
|
|
|
|
|
|
type searchHandler struct {
|
|
|
|
indexMapping mapping.IndexMapping
|
|
|
|
searchIndex bleve.Index
|
|
|
|
}
|
|
|
|
|
2020-07-01 14:40:10 +00:00
|
|
|
type nameLine struct {
|
2020-07-12 16:01:32 +00:00
|
|
|
Name string `json:"name"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Line string `json:"line"`
|
2020-07-01 14:40:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type searchObject struct {
|
2021-08-08 21:44:24 +00:00
|
|
|
Title string `json:"title"`
|
|
|
|
Blocks []string `json:"blocks"`
|
|
|
|
Refs []nameLine `json:"refs"`
|
|
|
|
Meta map[string]interface{} `json:"meta"`
|
|
|
|
Links []ParsedLink `json:"links"`
|
2020-07-01 14:40:10 +00:00
|
|
|
}
|
|
|
|
|
2020-06-30 20:56:12 +00:00
|
|
|
func NewSearchHandler(searchIndex bleve.Index) (http.Handler, error) {
|
|
|
|
return &searchHandler{
|
2020-07-01 14:40:10 +00:00
|
|
|
searchIndex: searchIndex,
|
2020-06-30 20:56:12 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
2020-07-04 13:07:32 +00:00
|
|
|
defer r.Body.Close()
|
|
|
|
|
|
|
|
sess, err := NewSession(w, r)
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err := sess.Flush(); err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
if !sess.LoggedIn {
|
|
|
|
fmt.Fprint(w, "{}")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-07-12 16:01:32 +00:00
|
|
|
if r.URL.Query().Get("fields") == "1" {
|
|
|
|
fields, err := s.searchIndex.Fields()
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
enc := json.NewEncoder(w)
|
|
|
|
enc.SetIndent("", " ")
|
|
|
|
err = enc.Encode(&fields)
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
return
|
|
|
|
} else if r.Method == "GET" && r.URL.Query().Get("reset") == "1" {
|
|
|
|
w.Header().Add("Content-Type", "text/html")
|
|
|
|
fmt.Fprint(w, `<form action="/search/" method="post"><input type=hidden name=reset value=1><input type=submit value="Reset Index"></form>`)
|
|
|
|
return
|
|
|
|
} else if r.Method == "POST" {
|
|
|
|
err = r.ParseForm()
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if r.PostForm.Get("reset") == "1" {
|
|
|
|
|
|
|
|
refs := make(Refs)
|
|
|
|
mp := NewFilePages("data", nil)
|
|
|
|
|
|
|
|
pages, err := mp.AllPages()
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
2020-10-21 18:49:23 +00:00
|
|
|
|
|
|
|
for _, page := range pages {
|
2021-08-18 19:01:18 +00:00
|
|
|
err = mp.saveBlocksFromPage("data", page)
|
2020-10-21 18:49:23 +00:00
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while processing blocks from page %s: %v", page.Name, err)
|
2020-10-21 18:49:23 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reload all pages
|
|
|
|
pages, err = mp.AllPages()
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-07-12 16:01:32 +00:00
|
|
|
for _, page := range pages {
|
2020-10-21 18:49:23 +00:00
|
|
|
log.Println("processing ", page.Title)
|
2020-07-12 16:01:32 +00:00
|
|
|
err = processBackrefsForPage(page, refs)
|
|
|
|
if err != nil {
|
|
|
|
log.Println("error while processing backrefs: ", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
2020-10-21 18:49:23 +00:00
|
|
|
|
|
|
|
log.Println("saveLinks")
|
|
|
|
err = saveLinks(mp)
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while saving links %v", err)
|
2020-10-21 18:49:23 +00:00
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Println("saveBackrefs")
|
2020-07-12 16:01:32 +00:00
|
|
|
err = saveBackrefs("data/backrefs.json", refs)
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while saving backrefs %v", err)
|
2020-07-12 16:01:32 +00:00
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
err = os.RemoveAll("data/_tmp_index")
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while remove old index %v", err)
|
2020-07-12 16:01:32 +00:00
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-10-21 18:49:23 +00:00
|
|
|
index, err := createSearchIndex("data", "_tmp_index")
|
2020-07-12 16:01:32 +00:00
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, page := range pages {
|
2021-08-18 19:01:18 +00:00
|
|
|
searchObjects, err := createSearchObjects(mp, page.Name)
|
2020-07-12 16:01:32 +00:00
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while creating search object %s: %v", page.Title, err)
|
2020-07-12 16:01:32 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-10-21 18:49:23 +00:00
|
|
|
for _, so := range searchObjects {
|
|
|
|
err = index.Index(so.ID, so)
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while indexing %s: %v", page.Title, err)
|
2020-10-21 18:49:23 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-07-12 16:01:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err = os.Rename("data/_page-index", "data/_page-index-old")
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while resetting index: %v", err)
|
2020-07-12 16:01:32 +00:00
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err = os.Rename("data/_tmp_index", "data/_page-index")
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while putthing new index in place: %v", err)
|
2020-07-12 16:01:32 +00:00
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err = os.RemoveAll("data/_page-index-old")
|
|
|
|
if err != nil {
|
2021-08-08 21:43:40 +00:00
|
|
|
log.Printf("error while remove old index %v", err)
|
2020-07-12 16:01:32 +00:00
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
enc := json.NewEncoder(w)
|
|
|
|
enc.SetIndent("", " ")
|
|
|
|
err = enc.Encode(struct {
|
|
|
|
Ok bool `json:"ok"`
|
|
|
|
}{Ok: true})
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-06-30 20:56:12 +00:00
|
|
|
q := bleve.NewQueryStringQuery(r.URL.Query().Get("q"))
|
|
|
|
sr := bleve.NewSearchRequest(q)
|
2020-10-30 23:29:41 +00:00
|
|
|
sr.IncludeLocations = false
|
|
|
|
sr.Size = 25
|
2020-10-21 18:49:23 +00:00
|
|
|
sr.Fields = []string{"page", "title", "text"}
|
|
|
|
sr.Highlight = bleve.NewHighlightWithStyle("html")
|
|
|
|
sr.Highlight.AddField("text")
|
2020-06-30 20:56:12 +00:00
|
|
|
results, err := s.searchIndex.Search(sr)
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
}
|
|
|
|
enc := json.NewEncoder(w)
|
|
|
|
enc.SetIndent("", " ")
|
|
|
|
err = enc.Encode(&results)
|
|
|
|
if err != nil {
|
|
|
|
http.Error(w, err.Error(), 500)
|
|
|
|
}
|
|
|
|
}
|
2020-07-01 14:40:10 +00:00
|
|
|
|
2020-10-21 18:49:23 +00:00
|
|
|
type pageBlock struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Page string `json:"page"`
|
|
|
|
Text string `json:"text"`
|
2020-10-30 23:29:41 +00:00
|
|
|
Link string `json:"link"`
|
2020-10-21 18:49:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p pageBlock) Type() string {
|
|
|
|
return "block"
|
|
|
|
}
|
|
|
|
|
2021-08-18 19:01:18 +00:00
|
|
|
func createSearchObjects(fp *FilePages, rootBlockID string) ([]pageBlock, error) {
|
|
|
|
blocks, err := fp.blockRepo.GetBlocks(rootBlockID)
|
2020-10-21 18:49:23 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var pageBlocks []pageBlock
|
|
|
|
|
|
|
|
queue := []string{blocks.PageID}
|
|
|
|
|
|
|
|
for len(queue) > 0 {
|
|
|
|
current := queue[0]
|
|
|
|
queue = queue[1:]
|
|
|
|
|
2020-10-30 23:29:41 +00:00
|
|
|
links, err := ParseLinks(current, blocks.Texts[current])
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(links) == 0 {
|
|
|
|
pageBlocks = append(pageBlocks, pageBlock{
|
|
|
|
ID: current,
|
|
|
|
Title: blocks.Texts[blocks.PageID],
|
|
|
|
Page: blocks.PageID,
|
|
|
|
Text: blocks.Texts[current],
|
|
|
|
Link: "",
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
for _, link := range links {
|
|
|
|
pageBlocks = append(pageBlocks, pageBlock{
|
|
|
|
ID: current,
|
|
|
|
Title: blocks.Texts[blocks.PageID],
|
|
|
|
Page: blocks.PageID,
|
|
|
|
Text: blocks.Texts[current],
|
|
|
|
Link: link.Name,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2020-10-21 18:49:23 +00:00
|
|
|
|
|
|
|
queue = append(queue, blocks.Children[current]...)
|
|
|
|
}
|
|
|
|
|
|
|
|
return pageBlocks, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func createStructuredFormat(page Page) (searchObject, error) {
|
2020-07-01 14:40:10 +00:00
|
|
|
so := searchObject{}
|
|
|
|
so.Title = page.Title
|
2021-08-08 21:44:24 +00:00
|
|
|
so.Meta = make(map[string]interface{})
|
2020-07-01 14:40:10 +00:00
|
|
|
|
|
|
|
type simpleListItem struct {
|
2021-08-08 21:44:24 +00:00
|
|
|
Text string
|
|
|
|
ID string
|
|
|
|
Indented int
|
2020-07-01 14:40:10 +00:00
|
|
|
}
|
|
|
|
|
2021-08-08 21:44:24 +00:00
|
|
|
type parent struct {
|
|
|
|
key string
|
|
|
|
indent int
|
|
|
|
items []interface{}
|
|
|
|
values map[string]interface{}
|
|
|
|
}
|
|
|
|
|
|
|
|
var parents []parent
|
|
|
|
parents = append(parents, parent{
|
|
|
|
values: make(map[string]interface{}),
|
|
|
|
})
|
|
|
|
|
2020-07-01 14:40:10 +00:00
|
|
|
var listItems []simpleListItem
|
|
|
|
if err := json.NewDecoder(strings.NewReader(page.Content)).Decode(&listItems); err != nil {
|
|
|
|
so.Blocks = append(so.Blocks, page.Content)
|
|
|
|
} else {
|
|
|
|
for _, li := range listItems {
|
|
|
|
meta := strings.SplitN(li.Text, "::", 2)
|
2021-08-08 21:44:24 +00:00
|
|
|
par := parents[len(parents)-1]
|
|
|
|
// merge up
|
|
|
|
for len(parents) > 1 && li.Indented <= par.indent {
|
|
|
|
parents = parents[:len(parents)-1]
|
|
|
|
nextTop := parents[len(parents)-1]
|
|
|
|
if len(par.values) > 0 {
|
|
|
|
if vals, e := nextTop.values[par.key]; e {
|
|
|
|
if vals2, ok := vals.(map[string]interface{}); ok {
|
|
|
|
for k, v := range par.values {
|
|
|
|
vals2[k] = v
|
|
|
|
}
|
|
|
|
nextTop.values[par.key] = vals2
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
nextTop.values[par.key] = par.values
|
|
|
|
}
|
|
|
|
} else if len(par.items) > 0 {
|
|
|
|
nextTop.values[par.key] = par.items
|
|
|
|
} else {
|
|
|
|
nextTop.values[par.key] = ""
|
|
|
|
}
|
|
|
|
parents[len(parents)-1] = nextTop
|
|
|
|
par = parents[len(parents)-1]
|
|
|
|
}
|
2020-07-01 14:40:10 +00:00
|
|
|
if len(meta) == 2 {
|
2020-10-25 14:37:31 +00:00
|
|
|
key := strcase.ToSnake(strings.TrimSpace(meta[0]))
|
2020-08-31 09:50:03 +00:00
|
|
|
value := strings.TrimSpace(meta[1])
|
2021-08-08 21:44:24 +00:00
|
|
|
if value == "" {
|
|
|
|
parents = append(parents, parent{
|
|
|
|
key: key,
|
|
|
|
indent: li.Indented,
|
|
|
|
values: make(map[string]interface{}),
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
if len(parents) > 0 {
|
|
|
|
par = parents[len(parents)-1]
|
|
|
|
// save new value
|
|
|
|
if li.Indented > par.indent {
|
|
|
|
links, err := ParseLinks(li.ID, value)
|
|
|
|
if err != nil {
|
|
|
|
par.values[key] = value
|
|
|
|
} else {
|
|
|
|
if len(links) > 0 {
|
|
|
|
links[0].Href = fmt.Sprintf("%s%s", *baseurl, links[0].PageName)
|
|
|
|
links[0].ID = ""
|
|
|
|
par.values[key] = links[0]
|
|
|
|
} else {
|
|
|
|
par.values[key] = value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
parents[len(parents)-1] = par
|
|
|
|
}
|
2020-07-19 15:20:38 +00:00
|
|
|
}
|
2021-08-08 21:44:24 +00:00
|
|
|
} else {
|
|
|
|
links, err := ParseLinks(li.ID, li.Text)
|
|
|
|
if err != nil {
|
|
|
|
par.items = append(par.items, li.Text)
|
|
|
|
} else if len(links) > 0 {
|
|
|
|
links[0].Href = fmt.Sprintf("%s%s", *baseurl, links[0].PageName)
|
|
|
|
links[0].ID = ""
|
|
|
|
par.items = append(par.items, links[0])
|
|
|
|
} else {
|
|
|
|
par.items = append(par.items, li.Text)
|
|
|
|
}
|
|
|
|
parents[len(parents)-1] = par
|
2020-07-01 14:40:10 +00:00
|
|
|
}
|
2020-08-31 09:50:03 +00:00
|
|
|
|
2020-07-01 14:40:10 +00:00
|
|
|
so.Blocks = append(so.Blocks, li.Text)
|
2020-08-31 09:50:03 +00:00
|
|
|
|
|
|
|
links, err := ParseLinks(li.ID, li.Text)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, link := range links {
|
|
|
|
links[i].Href = fmt.Sprintf("%s%s", *baseurl, link.PageName)
|
|
|
|
}
|
|
|
|
|
|
|
|
so.Links = append(so.Links, links...)
|
2020-07-01 14:40:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-08 21:44:24 +00:00
|
|
|
// merge up
|
|
|
|
for len(parents) > 1 {
|
|
|
|
par := parents[len(parents)-1]
|
|
|
|
parents = parents[:len(parents)-1]
|
|
|
|
nextTop := parents[len(parents)-1]
|
|
|
|
if len(par.values) > 0 {
|
|
|
|
nextTop.values[par.key] = par.values
|
|
|
|
} else if len(par.items) > 0 {
|
|
|
|
nextTop.values[par.key] = par.items
|
|
|
|
} else {
|
|
|
|
nextTop.values[par.key] = ""
|
|
|
|
}
|
|
|
|
parents[len(parents)-1] = nextTop
|
|
|
|
}
|
|
|
|
|
|
|
|
so.Meta = parents[0].values
|
|
|
|
|
2020-07-12 16:01:32 +00:00
|
|
|
for _, refs := range page.Refs {
|
2020-07-01 14:40:10 +00:00
|
|
|
for _, ref := range refs {
|
|
|
|
so.Refs = append(so.Refs, nameLine{
|
2020-07-12 16:01:32 +00:00
|
|
|
ref.Name,
|
|
|
|
ref.Title,
|
2020-08-31 09:50:03 +00:00
|
|
|
strings.TrimSpace(ref.Line),
|
2020-07-01 14:40:10 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return so, nil
|
|
|
|
}
|