From 258dd4f7abd4a2aef381f541c6f975bac3dcf514 Mon Sep 17 00:00:00 2001 From: Peter Stuifzand Date: Sat, 8 Jan 2022 21:58:41 +0100 Subject: [PATCH] Problem: search does not work with dates and tags Solution: add search for tags and dates --- editor/src/index.js | 2 +- editor/src/search.js | 8 ++- editor/src/styles.scss | 7 +++ go.mod | 1 + link/parser.go | 29 ++++++++++- link/parser_test.go | 22 ++++++-- main.go | 15 ++++-- search.go | 113 +++++++++++++++++++++++++---------------- util.go | 61 ++++++++++++++++++++-- 9 files changed, 201 insertions(+), 57 deletions(-) diff --git a/editor/src/index.js b/editor/src/index.js index afaa9c2..99eb9c0 100644 --- a/editor/src/index.js +++ b/editor/src/index.js @@ -34,7 +34,7 @@ $(document).on('keydown', '.keyboard-list', function (event) { $(document).on('keydown', '#search-input', function (event) { let $ac = $('#autocomplete:visible'); if (event.key === 'Escape') { - $(this).val(''); + $(this).val('').removeClass('is-error'); if ($ac.length) { $ac.fadeOut(); diff --git a/editor/src/search.js b/editor/src/search.js index fe7d57d..2c37259 100644 --- a/editor/src/search.js +++ b/editor/src/search.js @@ -8,10 +8,16 @@ function search(element) { element: element, search(query) { element.classList.add('is-loading') + let result; return startQuery(query) .then(res => { - element.classList.remove('is-loading') + element.classList.remove('is-loading', 'is-error') + result = res return res + }).catch(e => { + console.log(e) + element.classList.add('is-error') + return result || [] }) } } diff --git a/editor/src/styles.scss b/editor/src/styles.scss index 68222be..b36d7f7 100644 --- a/editor/src/styles.scss +++ b/editor/src/styles.scss @@ -607,3 +607,10 @@ input.input-line, input.input-line:active { .tab-page.tab-active { display: block; } +.search.input { + border: none; + padding: 2px; +} +.search.input.is-error { + outline: red solid 4px; +} diff --git a/go.mod b/go.mod index 078c740..27490e0 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect diff --git a/link/parser.go b/link/parser.go index 36617e6..f679386 100644 --- a/link/parser.go +++ b/link/parser.go @@ -5,6 +5,7 @@ import ( "fmt" "html/template" "io" + "log" "strings" ) @@ -76,6 +77,24 @@ func formatTitle(w io.Writer, input string, root Tree, indent int) { } } +func findAllLinks(input string, root Tree) []string { + var links []string + typ := root.cur.typ + if typ == "link" { + links = append(links, root.children[1].text(input)) + } + for _, c := range root.children { + links = append(links, findAllLinks(input, c)...) + } + return links +} + +func FindAllLinks(input string) []string { + var p Parser + root := p.Parse(input) + return findAllLinks(input, root) +} + func FormatHtmlTitle(input string) template.HTML { p := Parser{} root := p.Parse(input) @@ -87,13 +106,16 @@ func FormatHtmlTitle(input string) template.HTML { func (p *Parser) Parse(input string) Tree { p.stack = append(p.stack, Tree{}) + limit := 1000 + i := 0 p.pushMarker(i) - for i < len(input) { + for i < len(input) && limit > 0 { p.pushMarker(i) for i < len(input) && (input[i] != '[' && input[i] != ']') { i++ + limit-- } p.popMarker(i, "text") if i+2 <= len(input) && input[i:i+2] == "[[" { @@ -109,8 +131,13 @@ func (p *Parser) Parse(input string) Tree { p.popMarker(i, "end link tag") p.popMarker(i, "link") } + limit-- } p.popMarker(i, "full text") + if limit == 0 { + log.Println("LIMIT REACHED: ", input) + } + return p.output() } diff --git a/link/parser_test.go b/link/parser_test.go index 961f44d..5bd8156 100644 --- a/link/parser_test.go +++ b/link/parser_test.go @@ -25,15 +25,31 @@ import ( ) func TestFormatHtmlTitle(t *testing.T) { - tests := []struct { input, output string }{ - {input: "hello", output: "hello"}, - {input: "hello [[world]]", output: `hello [[world]]`}, + {input: "hello", output: "hello"}, + {input: "hello [[world]]", output: `hello [[world]]`}, + {input: "hello [[world]] end", output: `hello [[world]] end`}, + {input: "hello [[world [[current stuff]] here]] end", output: `hello [[world [[current stuff]] here]] end`}, } for _, test := range tests { s := FormatHtmlTitle(test.input) assert.Equal(t, test.output, string(s)) } } +func TestFindAllLinks(t *testing.T) { + tests := []struct { + input string + output []string + }{ + {input: "hello", output: nil}, + {input: "hello [[world]]", output: []string{"world"}}, + {input: "hello [[world]] end", output: []string{"world"}}, + {input: "hello [[world [[current stuff]] here]] end", output: []string{"world [[current stuff]] here", "current stuff"}}, + } + for _, test := range tests { + links := FindAllLinks(test.input) + assert.Equal(t, test.output, links) + } +} diff --git a/main.go b/main.go index eaf438a..61230d8 100644 --- a/main.go +++ b/main.go @@ -1261,9 +1261,9 @@ func createSearchIndex(dataDir, indexName string) (bleve.Index, error) { indexMapping := bleve.NewIndexMapping() documentMapping := bleve.NewDocumentMapping() - nameFieldMapping := bleve.NewTextFieldMapping() - nameFieldMapping.Store = true - documentMapping.AddFieldMappingsAt("name", nameFieldMapping) + pageFieldMapping := bleve.NewTextFieldMapping() + pageFieldMapping.Store = true + documentMapping.AddFieldMappingsAt("page", pageFieldMapping) titleFieldMapping := bleve.NewTextFieldMapping() titleFieldMapping.Store = true @@ -1273,6 +1273,15 @@ func createSearchIndex(dataDir, indexName string) (bleve.Index, error) { linkFieldMapping.Store = true documentMapping.AddFieldMappingsAt("link", linkFieldMapping) + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = true + documentMapping.AddFieldMappingsAt("text", textFieldMapping) + + dateFieldMapping := bleve.NewDateTimeFieldMapping() + dateFieldMapping.Store = false + dateFieldMapping.Index = true + documentMapping.AddFieldMappingsAt("date", dateFieldMapping) + indexMapping.AddDocumentMapping("block", documentMapping) searchIndex, err := bleve.New(indexDir, indexMapping) diff --git a/search.go b/search.go index b208308..c1cfe53 100644 --- a/search.go +++ b/search.go @@ -25,9 +25,11 @@ import ( "net/http" "os" "strings" + "time" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/mapping" + "github.com/davecgh/go-spew/spew" "github.com/iancoleman/strcase" ) @@ -53,6 +55,7 @@ type searchObject struct { Refs []nameLine `json:"refs"` Meta map[string]interface{} `json:"meta"` Links []ParsedLink `json:"links"` + Dates []time.Time `json:"dates"` } func NewSearchHandler(searchIndex bleve.Index) (http.Handler, error) { @@ -104,6 +107,9 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } if r.PostForm.Get("reset") == "1" { + var sw stopwatch + sw.Start("full reset") + defer sw.Stop() refs := make(Refs) mp := NewFilePages("data", nil) @@ -122,6 +128,8 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } } + sw.Lap("save blocks from pages") + // Reload all pages pages, err = mp.AllPages() if err != nil { @@ -138,21 +146,23 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } } - log.Println("saveLinks") + sw.Lap("process backrefs for pages") + err = saveLinks(mp) if err != nil { log.Printf("error while saving links %v", err) http.Error(w, err.Error(), 500) return } + sw.Lap("save links") - log.Println("saveBackrefs") err = saveBackrefs("data/backrefs.json", refs) if err != nil { log.Printf("error while saving backrefs %v", err) http.Error(w, err.Error(), 500) return } + sw.Lap("save backrefs") err = os.RemoveAll("data/_tmp_index") if err != nil { @@ -161,28 +171,12 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - index, err := createSearchIndex("data", "_tmp_index") + _, err = createSearchIndex("data", "_tmp_index") if err != nil { http.Error(w, err.Error(), 500) return } - for _, page := range pages { - searchObjects, err := createSearchObjects(page.Name) - if err != nil { - log.Printf("error while creating search object %s: %v", page.Title, err) - continue - } - - for _, so := range searchObjects { - err = index.Index(so.ID, so) - if err != nil { - log.Printf("error while indexing %s: %v", page.Title, err) - continue - } - } - } - err = os.Rename("data/_page-index", "data/_page-index-old") if err != nil { log.Printf("error while resetting index: %v", err) @@ -201,6 +195,7 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), 500) return } + sw.Lap("indexing") enc := json.NewEncoder(w) enc.SetIndent("", " ") @@ -219,7 +214,7 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { sr := bleve.NewSearchRequest(q) sr.IncludeLocations = false sr.Size = 25 - sr.Fields = []string{"page", "title", "text"} + sr.Fields = []string{"page", "title", "text", "date"} sr.Highlight = bleve.NewHighlightWithStyle("html") sr.Highlight.AddField("text") results, err := s.searchIndex.Search(sr) @@ -235,11 +230,13 @@ func (s *searchHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } type pageBlock struct { - ID string `json:"id"` - Title string `json:"title"` - Page string `json:"page"` - Text string `json:"text"` - Link string `json:"link"` + ID string `json:"id"` + Title string `json:"title"` + Page string `json:"page"` + Text string `json:"text"` + Link []string `json:"link"` + Tag []string `json:"tag"` + Date []time.Time `json:"date"` } func (p pageBlock) Type() string { @@ -247,6 +244,7 @@ func (p pageBlock) Type() string { } func createSearchObjects(rootBlockID string) ([]pageBlock, error) { + log.Println("createSearchObjects", rootBlockID) blocks, err := loadBlocks("data", rootBlockID) if err != nil { return nil, err @@ -262,32 +260,49 @@ func createSearchObjects(rootBlockID string) ([]pageBlock, error) { links, err := ParseLinks(current, blocks.Texts[current]) if err != nil { - continue + log.Println("ParseLinks", err) + links = nil } - if len(links) == 0 { - pageBlocks = append(pageBlocks, pageBlock{ - ID: current, - Title: blocks.Texts[blocks.PageID], - Page: blocks.PageID, - Text: blocks.Texts[current], - Link: "", - }) - } else { - for _, link := range links { - pageBlocks = append(pageBlocks, pageBlock{ - ID: current, - Title: blocks.Texts[blocks.PageID], - Page: blocks.PageID, - Text: blocks.Texts[current], - Link: link.Name, - }) - } + var linkNames []string + for _, link := range links { + linkNames = append(linkNames, link.Name) } + tags, err := ParseTags(blocks.Texts[current]) + if err != nil { + log.Println("ParseTags", err) + tags = nil + } + + dates, err := ParseDates(blocks.Texts[current]) + if err != nil { + log.Println("ParseDates", err) + dates = nil + } + + pageDate, err := ParseDatePageName(blocks.Texts[blocks.PageID]) + if err == nil { + dates = append(dates, pageDate) + } + + pageBlocks = append(pageBlocks, pageBlock{ + ID: current, + Title: blocks.Texts[blocks.PageID], + Page: blocks.PageID, + Text: blocks.Texts[current], + Link: linkNames, + Tag: tags, + Date: dates, + }) + queue = append(queue, blocks.Children[current]...) } + if rootBlockID == "Henk_Stuifzand" { + spew.Dump(pageBlocks) + } + return pageBlocks, nil } @@ -400,9 +415,19 @@ func createStructuredFormat(page Page) (searchObject, error) { } so.Links = append(so.Links, links...) + + dates, err := ParseDates(li.Text) + if err != nil { + dates = nil + } + so.Dates = append(so.Dates, dates...) } } + date, err := ParseDatePageName(so.Title) + if err == nil { + so.Dates = append(so.Dates, date) + } // merge up for len(parents) > 1 { par := parents[len(parents)-1] diff --git a/util.go b/util.go index 403d0d6..93f0092 100644 --- a/util.go +++ b/util.go @@ -29,6 +29,8 @@ import ( "strconv" "strings" "time" + + "p83.nl/go/wiki/link" ) var ( @@ -70,6 +72,25 @@ func RandStringBytes(n int) string { return string(b) } +type DateLink struct { + Link string + Date time.Time +} + +func ParseDates(content string) ([]time.Time, error) { + links := link.FindAllLinks(content) + var result []time.Time + for _, linkName := range links { + date, err := ParseDatePageName(linkName) + if err != nil { + continue + } + result = append(result, date) + } + + return result, nil +} + func ParseLinks(blockId string, content string) ([]ParsedLink, error) { hrefRE := regexp.MustCompile(`(#?\[\[\s*([^\]]+)\s*\]\])`) // keywordsRE := regexp.MustCompile(`(\w+)::`) @@ -110,6 +131,29 @@ func ParseLinks(blockId string, content string) ([]ParsedLink, error) { return result, nil } +func ParseTags(content string) ([]string, error) { + hrefRE := regexp.MustCompile(`(#\[\[\s*([^\]]+)\s*\]\])`) + + scanner := bufio.NewScanner(strings.NewReader(content)) + scanner.Split(bufio.ScanLines) + + var result []string + + for scanner.Scan() { + line := scanner.Text() + links := hrefRE.FindAllStringSubmatch(line, -1) + for _, matches := range links { + link := matches[0] + link = strings.TrimPrefix(link, "#[[") + link = strings.TrimSuffix(link, "]]") + link = strings.TrimSpace(link) + result = append(result, link) + } + } + + return result, nil +} + func cleanNameURL(name string) string { return strings.Replace(name, " ", "_", -1) } @@ -119,15 +163,24 @@ func cleanTitle(name string) string { } type stopwatch struct { - start time.Time - label string + start time.Time + lastLap time.Time + label string } func (sw *stopwatch) Start(label string) { sw.start = time.Now() + sw.lastLap = time.Now() sw.label = label } +func (sw *stopwatch) Lap(label string) { + now := time.Now() + d := now.Sub(sw.lastLap) + log.Printf("%-20s: %s\n", label, d.String()) + sw.lastLap = now +} + func (sw *stopwatch) Stop() { endTime := time.Now() d := endTime.Sub(sw.start) @@ -176,7 +229,7 @@ func parseMonth(month string) (time.Month, error) { } func ParseDatePageName(name string) (time.Time, error) { - if matches := niceDateParseRE.FindStringSubmatch(name); matches != nil { + if matches := niceDateParseRE.FindStringSubmatch(strings.Replace(name, " ", "_", -1)); matches != nil { day, err := strconv.Atoi(matches[1]) if err != nil { return time.Time{}, fmt.Errorf("%q: %s: %w", name, err, ParseFailed) @@ -189,7 +242,7 @@ func ParseDatePageName(name string) (time.Time, error) { if err != nil { return time.Time{}, fmt.Errorf("%q: %s: %w", name, err, ParseFailed) } - return time.Date(year, month, day, 0, 0, 0, 0, time.Local), nil + return time.Date(year, month, day, 0, 0, 0, 0, time.UTC), nil } return time.Time{}, fmt.Errorf("%q: invalid syntax: %w", name, ParseFailed) }