Fix synchronization bug in repo indexer (#3455)
This commit is contained in:
parent
17655cdf1b
commit
b16c84de7b
|
@ -5,9 +5,7 @@
|
||||||
package models
|
package models
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io/ioutil"
|
"fmt"
|
||||||
"os"
|
|
||||||
"path"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
@ -16,8 +14,6 @@ import (
|
||||||
"code.gitea.io/gitea/modules/indexer"
|
"code.gitea.io/gitea/modules/indexer"
|
||||||
"code.gitea.io/gitea/modules/log"
|
"code.gitea.io/gitea/modules/log"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
|
||||||
"github.com/Unknwon/com"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// RepoIndexerStatus status of a repo's entry in the repo indexer
|
// RepoIndexerStatus status of a repo's entry in the repo indexer
|
||||||
|
@ -132,7 +128,11 @@ func populateRepoIndexer(maxRepoID int64) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateRepoIndexer(repo *Repository) error {
|
func updateRepoIndexer(repo *Repository) error {
|
||||||
changes, err := getRepoChanges(repo)
|
sha, err := getDefaultBranchSha(repo)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
changes, err := getRepoChanges(repo, sha)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
} else if changes == nil {
|
} else if changes == nil {
|
||||||
|
@ -140,12 +140,12 @@ func updateRepoIndexer(repo *Repository) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
batch := indexer.RepoIndexerBatch()
|
batch := indexer.RepoIndexerBatch()
|
||||||
for _, filename := range changes.UpdatedFiles {
|
for _, update := range changes.Updates {
|
||||||
if err := addUpdate(filename, repo, batch); err != nil {
|
if err := addUpdate(update, repo, batch); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, filename := range changes.RemovedFiles {
|
for _, filename := range changes.RemovedFilenames {
|
||||||
if err := addDelete(filename, repo, batch); err != nil {
|
if err := addDelete(filename, repo, batch); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -153,56 +153,61 @@ func updateRepoIndexer(repo *Repository) error {
|
||||||
if err = batch.Flush(); err != nil {
|
if err = batch.Flush(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return updateLastIndexSync(repo)
|
return repo.updateIndexerStatus(sha)
|
||||||
}
|
}
|
||||||
|
|
||||||
// repoChanges changes (file additions/updates/removals) to a repo
|
// repoChanges changes (file additions/updates/removals) to a repo
|
||||||
type repoChanges struct {
|
type repoChanges struct {
|
||||||
UpdatedFiles []string
|
Updates []fileUpdate
|
||||||
RemovedFiles []string
|
RemovedFilenames []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type fileUpdate struct {
|
||||||
|
Filename string
|
||||||
|
BlobSha string
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDefaultBranchSha(repo *Repository) (string, error) {
|
||||||
|
stdout, err := git.NewCommand("show-ref", "-s", repo.DefaultBranch).RunInDir(repo.RepoPath())
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(stdout), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getRepoChanges returns changes to repo since last indexer update
|
// getRepoChanges returns changes to repo since last indexer update
|
||||||
func getRepoChanges(repo *Repository) (*repoChanges, error) {
|
func getRepoChanges(repo *Repository, revision string) (*repoChanges, error) {
|
||||||
repoWorkingPool.CheckIn(com.ToStr(repo.ID))
|
if err := repo.getIndexerStatus(); err != nil {
|
||||||
defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
|
|
||||||
|
|
||||||
if err := repo.UpdateLocalCopyBranch(""); err != nil {
|
|
||||||
return nil, err
|
|
||||||
} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
|
|
||||||
// repo does not have any commits yet, so nothing to update
|
|
||||||
return nil, nil
|
|
||||||
} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
|
|
||||||
return nil, err
|
|
||||||
} else if err = repo.getIndexerStatus(); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(repo.IndexerStatus.CommitSha) == 0 {
|
if len(repo.IndexerStatus.CommitSha) == 0 {
|
||||||
return genesisChanges(repo)
|
return genesisChanges(repo, revision)
|
||||||
}
|
}
|
||||||
return nonGenesisChanges(repo)
|
return nonGenesisChanges(repo, revision)
|
||||||
}
|
}
|
||||||
|
|
||||||
func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
|
func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error {
|
||||||
filepath := path.Join(repo.LocalCopyPath(), filename)
|
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
|
||||||
if stat, err := os.Stat(filepath); err != nil {
|
RunInDir(repo.RepoPath())
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
} else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
|
}
|
||||||
return nil
|
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
|
||||||
} else if stat.IsDir() {
|
return fmt.Errorf("Misformatted git cat-file output: %v", err)
|
||||||
// file could actually be a directory, if it is the root of a submodule.
|
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
|
||||||
// We do not index submodule contents, so don't do anything.
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
fileContents, err := ioutil.ReadFile(filepath)
|
|
||||||
|
fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha).
|
||||||
|
RunInDirBytes(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
} else if !base.IsTextFile(fileContents) {
|
} else if !base.IsTextFile(fileContents) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return batch.Add(indexer.RepoIndexerUpdate{
|
return batch.Add(indexer.RepoIndexerUpdate{
|
||||||
Filepath: filename,
|
Filepath: update.Filename,
|
||||||
Op: indexer.RepoIndexerOpUpdate,
|
Op: indexer.RepoIndexerOpUpdate,
|
||||||
Data: &indexer.RepoIndexerData{
|
Data: &indexer.RepoIndexerData{
|
||||||
RepoID: repo.ID,
|
RepoID: repo.ID,
|
||||||
|
@ -221,42 +226,76 @@ func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// genesisChanges get changes to add repo to the indexer for the first time
|
// parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
|
||||||
func genesisChanges(repo *Repository) (*repoChanges, error) {
|
func parseGitLsTreeOutput(stdout string) ([]fileUpdate, error) {
|
||||||
var changes repoChanges
|
lines := strings.Split(stdout, "\n")
|
||||||
stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
|
updates := make([]fileUpdate, 0, len(lines))
|
||||||
if err != nil {
|
for _, line := range lines {
|
||||||
return nil, err
|
// expect line to be "<mode> <object-type> <object-sha>\t<filename>"
|
||||||
}
|
line = strings.TrimSpace(line)
|
||||||
for _, line := range strings.Split(stdout, "\n") {
|
if len(line) == 0 {
|
||||||
filename := strings.TrimSpace(line)
|
|
||||||
if len(filename) == 0 {
|
|
||||||
continue
|
continue
|
||||||
} else if filename[0] == '"' {
|
}
|
||||||
|
firstSpaceIndex := strings.IndexByte(line, ' ')
|
||||||
|
if firstSpaceIndex < 0 {
|
||||||
|
log.Error(4, "Misformatted git ls-tree output: %s", line)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
tabIndex := strings.IndexByte(line, '\t')
|
||||||
|
if tabIndex < 42+firstSpaceIndex || tabIndex == len(line)-1 {
|
||||||
|
log.Error(4, "Misformatted git ls-tree output: %s", line)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if objectType := line[firstSpaceIndex+1 : tabIndex-41]; objectType != "blob" {
|
||||||
|
// submodules appear as commit objects, we do not index submodules
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
blobSha := line[tabIndex-40 : tabIndex]
|
||||||
|
filename := line[tabIndex+1:]
|
||||||
|
if filename[0] == '"' {
|
||||||
|
var err error
|
||||||
filename, err = strconv.Unquote(filename)
|
filename, err = strconv.Unquote(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
|
updates = append(updates, fileUpdate{
|
||||||
|
Filename: filename,
|
||||||
|
BlobSha: blobSha,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
return &changes, nil
|
return updates, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// genesisChanges get changes to add repo to the indexer for the first time
|
||||||
|
func genesisChanges(repo *Repository, revision string) (*repoChanges, error) {
|
||||||
|
var changes repoChanges
|
||||||
|
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
|
||||||
|
RunInDir(repo.RepoPath())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
changes.Updates, err = parseGitLsTreeOutput(stdout)
|
||||||
|
return &changes, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// nonGenesisChanges get changes since the previous indexer update
|
// nonGenesisChanges get changes since the previous indexer update
|
||||||
func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
|
func nonGenesisChanges(repo *Repository, revision string) (*repoChanges, error) {
|
||||||
diffCmd := git.NewCommand("diff", "--name-status",
|
diffCmd := git.NewCommand("diff", "--name-status",
|
||||||
repo.IndexerStatus.CommitSha, "HEAD")
|
repo.IndexerStatus.CommitSha, revision)
|
||||||
stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
|
stdout, err := diffCmd.RunInDir(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// previous commit sha may have been removed by a force push, so
|
// previous commit sha may have been removed by a force push, so
|
||||||
// try rebuilding from scratch
|
// try rebuilding from scratch
|
||||||
|
log.Warn("git diff: %v", err)
|
||||||
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
|
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return genesisChanges(repo)
|
return genesisChanges(repo, revision)
|
||||||
}
|
}
|
||||||
var changes repoChanges
|
var changes repoChanges
|
||||||
|
updatedFilenames := make([]string, 0, 10)
|
||||||
for _, line := range strings.Split(stdout, "\n") {
|
for _, line := range strings.Split(stdout, "\n") {
|
||||||
line = strings.TrimSpace(line)
|
line = strings.TrimSpace(line)
|
||||||
if len(line) == 0 {
|
if len(line) == 0 {
|
||||||
|
@ -274,23 +313,22 @@ func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
|
||||||
|
|
||||||
switch status := line[0]; status {
|
switch status := line[0]; status {
|
||||||
case 'M', 'A':
|
case 'M', 'A':
|
||||||
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
|
updatedFilenames = append(updatedFilenames, filename)
|
||||||
case 'D':
|
case 'D':
|
||||||
changes.RemovedFiles = append(changes.RemovedFiles, filename)
|
changes.RemovedFilenames = append(changes.RemovedFilenames, filename)
|
||||||
default:
|
default:
|
||||||
log.Warn("Unrecognized status: %c (line=%s)", status, line)
|
log.Warn("Unrecognized status: %c (line=%s)", status, line)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &changes, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func updateLastIndexSync(repo *Repository) error {
|
cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
|
||||||
stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
|
cmd.AddArguments(updatedFilenames...)
|
||||||
|
stdout, err = cmd.RunInDir(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
sha := strings.TrimSpace(stdout)
|
changes.Updates, err = parseGitLsTreeOutput(stdout)
|
||||||
return repo.updateIndexerStatus(sha)
|
return &changes, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func processRepoIndexerOperationQueue() {
|
func processRepoIndexerOperationQueue() {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user