Use git log name-status in get last commit (#16059)

* Improve get last commit using git log --name-status

git log --name-status -c provides information about the diff between a
commit and its parents. Using this and adjusting the algorithm to use
the first change to a path allows for a much faster generation of commit
info.

There is a subtle change in the results generated but this will cause
the results to more closely match those from elsewhere.

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: 6543 <6543@obermui.de>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
zeripath 2021-06-20 23:00:46 +01:00 committed by GitHub
parent 8fa3bbc424
commit 23358bc55d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
40 changed files with 2540 additions and 297 deletions

View file

@ -7,15 +7,11 @@
package git
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"math"
"path"
"sort"
"strings"
)
// GetCommitsInfo gets information of all commits that are corresponding to these entries
@ -43,21 +39,16 @@ func (tes Entries) GetCommitsInfo(ctx context.Context, commit *Commit, treePath
return nil, nil, err
}
for i, found := range commits {
if err := cache.Put(commit.ID.String(), path.Join(treePath, unHitPaths[i]), found.ID.String()); err != nil {
for pth, found := range commits {
if err := cache.Put(commit.ID.String(), path.Join(treePath, pth), found.ID.String()); err != nil {
return nil, nil, err
}
revs[unHitPaths[i]] = found
revs[pth] = found
}
}
} else {
sort.Strings(entryPaths)
revs = map[string]*Commit{}
var foundCommits []*Commit
foundCommits, err = GetLastCommitForPaths(ctx, commit, treePath, entryPaths)
for i, found := range foundCommits {
revs[entryPaths[i]] = found
}
revs, err = GetLastCommitForPaths(ctx, commit, treePath, entryPaths)
}
if err != nil {
return nil, nil, err
@ -86,6 +77,8 @@ func (tes Entries) GetCommitsInfo(ctx context.Context, commit *Commit, treePath
subModuleFile := NewSubModuleFile(entryCommit, subModuleURL, entry.ID.String())
commitsInfo[i].SubModuleFile = subModuleFile
}
} else {
log("missing commit for %s", entry.Name())
}
}
@ -125,220 +118,24 @@ func getLastCommitForPathsByCache(ctx context.Context, commitID, treePath string
}
// GetLastCommitForPaths returns last commit information
func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, paths []string) ([]*Commit, error) {
func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, paths []string) (map[string]*Commit, error) {
// We read backwards from the commit to obtain all of the commits
// We'll do this by using rev-list to provide us with parent commits in order
revListReader, revListWriter := io.Pipe()
defer func() {
_ = revListWriter.Close()
_ = revListReader.Close()
}()
go func() {
stderr := strings.Builder{}
err := NewCommand("rev-list", "--format=%T", commit.ID.String()).SetParentContext(ctx).RunInDirPipeline(commit.repo.Path, revListWriter, &stderr)
if err != nil {
_ = revListWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
} else {
_ = revListWriter.Close()
}
}()
revs, err := WalkGitLog(ctx, commit.repo, commit, treePath, paths...)
if err != nil {
return nil, err
}
batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch()
defer cancel()
mapsize := 4096
if len(paths) > mapsize {
mapsize = len(paths)
}
path2idx := make(map[string]int, mapsize)
for i, path := range paths {
path2idx[path] = i
}
fnameBuf := make([]byte, 4096)
modeBuf := make([]byte, 40)
allShaBuf := make([]byte, (len(paths)+1)*20)
shaBuf := make([]byte, 20)
tmpTreeID := make([]byte, 40)
// commits is the returnable commits matching the paths provided
commits := make([]string, len(paths))
// ids are the blob/tree ids for the paths
ids := make([][]byte, len(paths))
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
scan := bufio.NewScanner(revListReader)
revListLoop:
for scan.Scan() {
// Get the next parent commit ID
commitID := scan.Text()
if !scan.Scan() {
break revListLoop
}
commitID = commitID[7:]
rootTreeID := scan.Text()
// push the tree to the cat-file --batch process
_, err := batchStdinWriter.Write([]byte(rootTreeID + "\n"))
if err != nil {
return nil, err
}
currentPath := ""
// OK if the target tree path is "" and the "" is in the paths just set this now
if treePath == "" && paths[0] == "" {
// If this is the first time we see this set the id appropriate for this paths to this tree and set the last commit to curCommit
if len(ids[0]) == 0 {
ids[0] = []byte(rootTreeID)
commits[0] = string(commitID)
} else if bytes.Equal(ids[0], []byte(rootTreeID)) {
commits[0] = string(commitID)
}
}
treeReadingLoop:
for {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
_, _, size, err := ReadBatchLine(batchReader)
if err != nil {
return nil, err
}
// Handle trees
// n is counter for file position in the tree file
var n int64
// Two options: currentPath is the targetTreepath
if treePath == currentPath {
// We are in the right directory
// Parse each tree line in turn. (don't care about mode here.)
for n < size {
fname, sha, count, err := ParseTreeLineSkipMode(batchReader, fnameBuf, shaBuf)
shaBuf = sha
if err != nil {
return nil, err
}
n += int64(count)
idx, ok := path2idx[string(fname)]
if ok {
// Now if this is the first time round set the initial Blob(ish) SHA ID and the commit
if len(ids[idx]) == 0 {
copy(allShaBuf[20*(idx+1):20*(idx+2)], shaBuf)
ids[idx] = allShaBuf[20*(idx+1) : 20*(idx+2)]
commits[idx] = string(commitID)
} else if bytes.Equal(ids[idx], shaBuf) {
commits[idx] = string(commitID)
}
}
// FIXME: is there any order to the way strings are emitted from cat-file?
// if there is - then we could skip once we've passed all of our data
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
break treeReadingLoop
}
var treeID []byte
// We're in the wrong directory
// Find target directory in this directory
idx := len(currentPath)
if idx > 0 {
idx++
}
target := strings.SplitN(treePath[idx:], "/", 2)[0]
for n < size {
// Read each tree entry in turn
mode, fname, sha, count, err := ParseTreeLine(batchReader, modeBuf, fnameBuf, shaBuf)
if err != nil {
return nil, err
}
n += int64(count)
// if we have found the target directory
if bytes.Equal(fname, []byte(target)) && bytes.Equal(mode, []byte("40000")) {
copy(tmpTreeID, sha)
treeID = tmpTreeID
break
}
}
if n < size {
// Discard any remaining entries in the current tree
discard := size - n
for discard > math.MaxInt32 {
_, err := batchReader.Discard(math.MaxInt32)
if err != nil {
return nil, err
}
discard -= math.MaxInt32
}
_, err := batchReader.Discard(int(discard))
if err != nil {
return nil, err
}
}
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
// if we haven't found a treeID for the target directory our search is over
if len(treeID) == 0 {
break treeReadingLoop
}
// add the target to the current path
if idx > 0 {
currentPath += "/"
}
currentPath += target
// if we've now found the current path check its sha id and commit status
if treePath == currentPath && paths[0] == "" {
if len(ids[0]) == 0 {
copy(allShaBuf[0:20], treeID)
ids[0] = allShaBuf[0:20]
commits[0] = string(commitID)
} else if bytes.Equal(ids[0], treeID) {
commits[0] = string(commitID)
}
}
treeID = To40ByteSHA(treeID, treeID)
_, err = batchStdinWriter.Write(treeID)
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte("\n"))
if err != nil {
return nil, err
}
}
}
if scan.Err() != nil {
return nil, scan.Err()
}
commitsMap := make(map[string]*Commit, len(commits))
commitsMap := map[string]*Commit{}
commitsMap[commit.ID.String()] = commit
commitCommits := make([]*Commit, len(commits))
for i, commitID := range commits {
commitCommits := map[string]*Commit{}
for path, commitID := range revs {
c, ok := commitsMap[commitID]
if ok {
commitCommits[i] = c
commitCommits[path] = c
continue
}
@ -364,8 +161,8 @@ revListLoop:
if _, err := batchReader.Discard(1); err != nil {
return nil, err
}
commitCommits[i] = c
commitCommits[path] = c
}
return commitCommits, scan.Err()
return commitCommits, nil
}