Fix synchronization bug in repo indexer (#3455)
This commit is contained in:
		
							parent
							
								
									17655cdf1b
								
							
						
					
					
						commit
						b16c84de7b
					
				| @ -5,9 +5,7 @@ | ||||
| package models | ||||
| 
 | ||||
| import ( | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"fmt" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 
 | ||||
| @ -16,8 +14,6 @@ import ( | ||||
| 	"code.gitea.io/gitea/modules/indexer" | ||||
| 	"code.gitea.io/gitea/modules/log" | ||||
| 	"code.gitea.io/gitea/modules/setting" | ||||
| 
 | ||||
| 	"github.com/Unknwon/com" | ||||
| ) | ||||
| 
 | ||||
| // RepoIndexerStatus status of a repo's entry in the repo indexer
 | ||||
| @ -132,7 +128,11 @@ func populateRepoIndexer(maxRepoID int64) { | ||||
| } | ||||
| 
 | ||||
| func updateRepoIndexer(repo *Repository) error { | ||||
| 	changes, err := getRepoChanges(repo) | ||||
| 	sha, err := getDefaultBranchSha(repo) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	changes, err := getRepoChanges(repo, sha) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if changes == nil { | ||||
| @ -140,12 +140,12 @@ func updateRepoIndexer(repo *Repository) error { | ||||
| 	} | ||||
| 
 | ||||
| 	batch := indexer.RepoIndexerBatch() | ||||
| 	for _, filename := range changes.UpdatedFiles { | ||||
| 		if err := addUpdate(filename, repo, batch); err != nil { | ||||
| 	for _, update := range changes.Updates { | ||||
| 		if err := addUpdate(update, repo, batch); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	for _, filename := range changes.RemovedFiles { | ||||
| 	for _, filename := range changes.RemovedFilenames { | ||||
| 		if err := addDelete(filename, repo, batch); err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| @ -153,56 +153,61 @@ func updateRepoIndexer(repo *Repository) error { | ||||
| 	if err = batch.Flush(); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	return updateLastIndexSync(repo) | ||||
| 	return repo.updateIndexerStatus(sha) | ||||
| } | ||||
| 
 | ||||
| // repoChanges changes (file additions/updates/removals) to a repo
 | ||||
| type repoChanges struct { | ||||
| 	UpdatedFiles []string | ||||
| 	RemovedFiles []string | ||||
| 	Updates          []fileUpdate | ||||
| 	RemovedFilenames []string | ||||
| } | ||||
| 
 | ||||
| type fileUpdate struct { | ||||
| 	Filename string | ||||
| 	BlobSha  string | ||||
| } | ||||
| 
 | ||||
| func getDefaultBranchSha(repo *Repository) (string, error) { | ||||
| 	stdout, err := git.NewCommand("show-ref", "-s", repo.DefaultBranch).RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return "", err | ||||
| 	} | ||||
| 	return strings.TrimSpace(stdout), nil | ||||
| } | ||||
| 
 | ||||
| // getRepoChanges returns changes to repo since last indexer update
 | ||||
| func getRepoChanges(repo *Repository) (*repoChanges, error) { | ||||
| 	repoWorkingPool.CheckIn(com.ToStr(repo.ID)) | ||||
| 	defer repoWorkingPool.CheckOut(com.ToStr(repo.ID)) | ||||
| 
 | ||||
| 	if err := repo.UpdateLocalCopyBranch(""); err != nil { | ||||
| 		return nil, err | ||||
| 	} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) { | ||||
| 		// repo does not have any commits yet, so nothing to update
 | ||||
| 		return nil, nil | ||||
| 	} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil { | ||||
| 		return nil, err | ||||
| 	} else if err = repo.getIndexerStatus(); err != nil { | ||||
| func getRepoChanges(repo *Repository, revision string) (*repoChanges, error) { | ||||
| 	if err := repo.getIndexerStatus(); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if len(repo.IndexerStatus.CommitSha) == 0 { | ||||
| 		return genesisChanges(repo) | ||||
| 		return genesisChanges(repo, revision) | ||||
| 	} | ||||
| 	return nonGenesisChanges(repo) | ||||
| 	return nonGenesisChanges(repo, revision) | ||||
| } | ||||
| 
 | ||||
| func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error { | ||||
| 	filepath := path.Join(repo.LocalCopyPath(), filename) | ||||
| 	if stat, err := os.Stat(filepath); err != nil { | ||||
| func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error { | ||||
| 	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||
| 		RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if stat.Size() > setting.Indexer.MaxIndexerFileSize { | ||||
| 		return nil | ||||
| 	} else if stat.IsDir() { | ||||
| 		// file could actually be a directory, if it is the root of a submodule.
 | ||||
| 		// We do not index submodule contents, so don't do anything.
 | ||||
| 	} | ||||
| 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | ||||
| 		return fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||
| 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | ||||
| 		return nil | ||||
| 	} | ||||
| 	fileContents, err := ioutil.ReadFile(filepath) | ||||
| 
 | ||||
| 	fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). | ||||
| 		RunInDirBytes(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 	} else if !base.IsTextFile(fileContents) { | ||||
| 		return nil | ||||
| 	} | ||||
| 	return batch.Add(indexer.RepoIndexerUpdate{ | ||||
| 		Filepath: filename, | ||||
| 		Filepath: update.Filename, | ||||
| 		Op:       indexer.RepoIndexerOpUpdate, | ||||
| 		Data: &indexer.RepoIndexerData{ | ||||
| 			RepoID:  repo.ID, | ||||
| @ -221,42 +226,76 @@ func addDelete(filename string, repo *Repository, batch *indexer.Batch) error { | ||||
| 	}) | ||||
| } | ||||
| 
 | ||||
| // genesisChanges get changes to add repo to the indexer for the first time
 | ||||
| func genesisChanges(repo *Repository) (*repoChanges, error) { | ||||
| 	var changes repoChanges | ||||
| 	stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath()) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	for _, line := range strings.Split(stdout, "\n") { | ||||
| 		filename := strings.TrimSpace(line) | ||||
| 		if len(filename) == 0 { | ||||
| // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command
 | ||||
| func parseGitLsTreeOutput(stdout string) ([]fileUpdate, error) { | ||||
| 	lines := strings.Split(stdout, "\n") | ||||
| 	updates := make([]fileUpdate, 0, len(lines)) | ||||
| 	for _, line := range lines { | ||||
| 		// expect line to be "<mode> <object-type> <object-sha>\t<filename>"
 | ||||
| 		line = strings.TrimSpace(line) | ||||
| 		if len(line) == 0 { | ||||
| 			continue | ||||
| 		} else if filename[0] == '"' { | ||||
| 		} | ||||
| 		firstSpaceIndex := strings.IndexByte(line, ' ') | ||||
| 		if firstSpaceIndex < 0 { | ||||
| 			log.Error(4, "Misformatted git ls-tree output: %s", line) | ||||
| 			continue | ||||
| 		} | ||||
| 		tabIndex := strings.IndexByte(line, '\t') | ||||
| 		if tabIndex < 42+firstSpaceIndex || tabIndex == len(line)-1 { | ||||
| 			log.Error(4, "Misformatted git ls-tree output: %s", line) | ||||
| 			continue | ||||
| 		} | ||||
| 		if objectType := line[firstSpaceIndex+1 : tabIndex-41]; objectType != "blob" { | ||||
| 			// submodules appear as commit objects, we do not index submodules
 | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		blobSha := line[tabIndex-40 : tabIndex] | ||||
| 		filename := line[tabIndex+1:] | ||||
| 		if filename[0] == '"' { | ||||
| 			var err error | ||||
| 			filename, err = strconv.Unquote(filename) | ||||
| 			if err != nil { | ||||
| 				return nil, err | ||||
| 			} | ||||
| 		} | ||||
| 		changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | ||||
| 		updates = append(updates, fileUpdate{ | ||||
| 			Filename: filename, | ||||
| 			BlobSha:  blobSha, | ||||
| 		}) | ||||
| 	} | ||||
| 	return &changes, nil | ||||
| 	return updates, nil | ||||
| } | ||||
| 
 | ||||
| // genesisChanges get changes to add repo to the indexer for the first time
 | ||||
| func genesisChanges(repo *Repository, revision string) (*repoChanges, error) { | ||||
| 	var changes repoChanges | ||||
| 	stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). | ||||
| 		RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	changes.Updates, err = parseGitLsTreeOutput(stdout) | ||||
| 	return &changes, err | ||||
| } | ||||
| 
 | ||||
| // nonGenesisChanges get changes since the previous indexer update
 | ||||
| func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | ||||
| func nonGenesisChanges(repo *Repository, revision string) (*repoChanges, error) { | ||||
| 	diffCmd := git.NewCommand("diff", "--name-status", | ||||
| 		repo.IndexerStatus.CommitSha, "HEAD") | ||||
| 	stdout, err := diffCmd.RunInDir(repo.LocalCopyPath()) | ||||
| 		repo.IndexerStatus.CommitSha, revision) | ||||
| 	stdout, err := diffCmd.RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		// previous commit sha may have been removed by a force push, so
 | ||||
| 		// try rebuilding from scratch
 | ||||
| 		log.Warn("git diff: %v", err) | ||||
| 		if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		return genesisChanges(repo) | ||||
| 		return genesisChanges(repo, revision) | ||||
| 	} | ||||
| 	var changes repoChanges | ||||
| 	updatedFilenames := make([]string, 0, 10) | ||||
| 	for _, line := range strings.Split(stdout, "\n") { | ||||
| 		line = strings.TrimSpace(line) | ||||
| 		if len(line) == 0 { | ||||
| @ -274,23 +313,22 @@ func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | ||||
| 
 | ||||
| 		switch status := line[0]; status { | ||||
| 		case 'M', 'A': | ||||
| 			changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | ||||
| 			updatedFilenames = append(updatedFilenames, filename) | ||||
| 		case 'D': | ||||
| 			changes.RemovedFiles = append(changes.RemovedFiles, filename) | ||||
| 			changes.RemovedFilenames = append(changes.RemovedFilenames, filename) | ||||
| 		default: | ||||
| 			log.Warn("Unrecognized status: %c (line=%s)", status, line) | ||||
| 		} | ||||
| 	} | ||||
| 	return &changes, nil | ||||
| } | ||||
| 
 | ||||
| func updateLastIndexSync(repo *Repository) error { | ||||
| 	stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath()) | ||||
| 	cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") | ||||
| 	cmd.AddArguments(updatedFilenames...) | ||||
| 	stdout, err = cmd.RunInDir(repo.RepoPath()) | ||||
| 	if err != nil { | ||||
| 		return err | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	sha := strings.TrimSpace(stdout) | ||||
| 	return repo.updateIndexerStatus(sha) | ||||
| 	changes.Updates, err = parseGitLsTreeOutput(stdout) | ||||
| 	return &changes, err | ||||
| } | ||||
| 
 | ||||
| func processRepoIndexerOperationQueue() { | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user