Use git log name-status in get last commit (#16059)
* Improve get last commit using git log --name-status git log --name-status -c provides information about the diff between a commit and its parents. Using this and adjusting the algorithm to use the first change to a path allows for a much faster generation of commit info. There is a subtle change in the results generated but this will cause the results to more closely match those from elsewhere. Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
parent
8fa3bbc424
commit
23358bc55d
40 changed files with 2540 additions and 297 deletions
|
@ -7,15 +7,11 @@
|
|||
package git
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GetCommitsInfo gets information of all commits that are corresponding to these entries
|
||||
|
@ -43,21 +39,16 @@ func (tes Entries) GetCommitsInfo(ctx context.Context, commit *Commit, treePath
|
|||
return nil, nil, err
|
||||
}
|
||||
|
||||
for i, found := range commits {
|
||||
if err := cache.Put(commit.ID.String(), path.Join(treePath, unHitPaths[i]), found.ID.String()); err != nil {
|
||||
for pth, found := range commits {
|
||||
if err := cache.Put(commit.ID.String(), path.Join(treePath, pth), found.ID.String()); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
revs[unHitPaths[i]] = found
|
||||
revs[pth] = found
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sort.Strings(entryPaths)
|
||||
revs = map[string]*Commit{}
|
||||
var foundCommits []*Commit
|
||||
foundCommits, err = GetLastCommitForPaths(ctx, commit, treePath, entryPaths)
|
||||
for i, found := range foundCommits {
|
||||
revs[entryPaths[i]] = found
|
||||
}
|
||||
revs, err = GetLastCommitForPaths(ctx, commit, treePath, entryPaths)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
|
@ -86,6 +77,8 @@ func (tes Entries) GetCommitsInfo(ctx context.Context, commit *Commit, treePath
|
|||
subModuleFile := NewSubModuleFile(entryCommit, subModuleURL, entry.ID.String())
|
||||
commitsInfo[i].SubModuleFile = subModuleFile
|
||||
}
|
||||
} else {
|
||||
log("missing commit for %s", entry.Name())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,220 +118,24 @@ func getLastCommitForPathsByCache(ctx context.Context, commitID, treePath string
|
|||
}
|
||||
|
||||
// GetLastCommitForPaths returns last commit information
|
||||
func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, paths []string) ([]*Commit, error) {
|
||||
func GetLastCommitForPaths(ctx context.Context, commit *Commit, treePath string, paths []string) (map[string]*Commit, error) {
|
||||
// We read backwards from the commit to obtain all of the commits
|
||||
|
||||
// We'll do this by using rev-list to provide us with parent commits in order
|
||||
revListReader, revListWriter := io.Pipe()
|
||||
defer func() {
|
||||
_ = revListWriter.Close()
|
||||
_ = revListReader.Close()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
stderr := strings.Builder{}
|
||||
err := NewCommand("rev-list", "--format=%T", commit.ID.String()).SetParentContext(ctx).RunInDirPipeline(commit.repo.Path, revListWriter, &stderr)
|
||||
if err != nil {
|
||||
_ = revListWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
|
||||
} else {
|
||||
_ = revListWriter.Close()
|
||||
}
|
||||
}()
|
||||
revs, err := WalkGitLog(ctx, commit.repo, commit, treePath, paths...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch()
|
||||
defer cancel()
|
||||
|
||||
mapsize := 4096
|
||||
if len(paths) > mapsize {
|
||||
mapsize = len(paths)
|
||||
}
|
||||
|
||||
path2idx := make(map[string]int, mapsize)
|
||||
for i, path := range paths {
|
||||
path2idx[path] = i
|
||||
}
|
||||
|
||||
fnameBuf := make([]byte, 4096)
|
||||
modeBuf := make([]byte, 40)
|
||||
|
||||
allShaBuf := make([]byte, (len(paths)+1)*20)
|
||||
shaBuf := make([]byte, 20)
|
||||
tmpTreeID := make([]byte, 40)
|
||||
|
||||
// commits is the returnable commits matching the paths provided
|
||||
commits := make([]string, len(paths))
|
||||
// ids are the blob/tree ids for the paths
|
||||
ids := make([][]byte, len(paths))
|
||||
|
||||
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
|
||||
scan := bufio.NewScanner(revListReader)
|
||||
revListLoop:
|
||||
for scan.Scan() {
|
||||
// Get the next parent commit ID
|
||||
commitID := scan.Text()
|
||||
if !scan.Scan() {
|
||||
break revListLoop
|
||||
}
|
||||
commitID = commitID[7:]
|
||||
rootTreeID := scan.Text()
|
||||
|
||||
// push the tree to the cat-file --batch process
|
||||
_, err := batchStdinWriter.Write([]byte(rootTreeID + "\n"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
currentPath := ""
|
||||
|
||||
// OK if the target tree path is "" and the "" is in the paths just set this now
|
||||
if treePath == "" && paths[0] == "" {
|
||||
// If this is the first time we see this set the id appropriate for this paths to this tree and set the last commit to curCommit
|
||||
if len(ids[0]) == 0 {
|
||||
ids[0] = []byte(rootTreeID)
|
||||
commits[0] = string(commitID)
|
||||
} else if bytes.Equal(ids[0], []byte(rootTreeID)) {
|
||||
commits[0] = string(commitID)
|
||||
}
|
||||
}
|
||||
|
||||
treeReadingLoop:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
_, _, size, err := ReadBatchLine(batchReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Handle trees
|
||||
|
||||
// n is counter for file position in the tree file
|
||||
var n int64
|
||||
|
||||
// Two options: currentPath is the targetTreepath
|
||||
if treePath == currentPath {
|
||||
// We are in the right directory
|
||||
// Parse each tree line in turn. (don't care about mode here.)
|
||||
for n < size {
|
||||
fname, sha, count, err := ParseTreeLineSkipMode(batchReader, fnameBuf, shaBuf)
|
||||
shaBuf = sha
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
n += int64(count)
|
||||
idx, ok := path2idx[string(fname)]
|
||||
if ok {
|
||||
// Now if this is the first time round set the initial Blob(ish) SHA ID and the commit
|
||||
if len(ids[idx]) == 0 {
|
||||
copy(allShaBuf[20*(idx+1):20*(idx+2)], shaBuf)
|
||||
ids[idx] = allShaBuf[20*(idx+1) : 20*(idx+2)]
|
||||
commits[idx] = string(commitID)
|
||||
} else if bytes.Equal(ids[idx], shaBuf) {
|
||||
commits[idx] = string(commitID)
|
||||
}
|
||||
}
|
||||
// FIXME: is there any order to the way strings are emitted from cat-file?
|
||||
// if there is - then we could skip once we've passed all of our data
|
||||
}
|
||||
if _, err := batchReader.Discard(1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
break treeReadingLoop
|
||||
}
|
||||
|
||||
var treeID []byte
|
||||
|
||||
// We're in the wrong directory
|
||||
// Find target directory in this directory
|
||||
idx := len(currentPath)
|
||||
if idx > 0 {
|
||||
idx++
|
||||
}
|
||||
target := strings.SplitN(treePath[idx:], "/", 2)[0]
|
||||
|
||||
for n < size {
|
||||
// Read each tree entry in turn
|
||||
mode, fname, sha, count, err := ParseTreeLine(batchReader, modeBuf, fnameBuf, shaBuf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
n += int64(count)
|
||||
|
||||
// if we have found the target directory
|
||||
if bytes.Equal(fname, []byte(target)) && bytes.Equal(mode, []byte("40000")) {
|
||||
copy(tmpTreeID, sha)
|
||||
treeID = tmpTreeID
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if n < size {
|
||||
// Discard any remaining entries in the current tree
|
||||
discard := size - n
|
||||
for discard > math.MaxInt32 {
|
||||
_, err := batchReader.Discard(math.MaxInt32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
discard -= math.MaxInt32
|
||||
}
|
||||
_, err := batchReader.Discard(int(discard))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if _, err := batchReader.Discard(1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// if we haven't found a treeID for the target directory our search is over
|
||||
if len(treeID) == 0 {
|
||||
break treeReadingLoop
|
||||
}
|
||||
|
||||
// add the target to the current path
|
||||
if idx > 0 {
|
||||
currentPath += "/"
|
||||
}
|
||||
currentPath += target
|
||||
|
||||
// if we've now found the current path check its sha id and commit status
|
||||
if treePath == currentPath && paths[0] == "" {
|
||||
if len(ids[0]) == 0 {
|
||||
copy(allShaBuf[0:20], treeID)
|
||||
ids[0] = allShaBuf[0:20]
|
||||
commits[0] = string(commitID)
|
||||
} else if bytes.Equal(ids[0], treeID) {
|
||||
commits[0] = string(commitID)
|
||||
}
|
||||
}
|
||||
treeID = To40ByteSHA(treeID, treeID)
|
||||
_, err = batchStdinWriter.Write(treeID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, err = batchStdinWriter.Write([]byte("\n"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
if scan.Err() != nil {
|
||||
return nil, scan.Err()
|
||||
}
|
||||
|
||||
commitsMap := make(map[string]*Commit, len(commits))
|
||||
commitsMap := map[string]*Commit{}
|
||||
commitsMap[commit.ID.String()] = commit
|
||||
|
||||
commitCommits := make([]*Commit, len(commits))
|
||||
for i, commitID := range commits {
|
||||
commitCommits := map[string]*Commit{}
|
||||
for path, commitID := range revs {
|
||||
c, ok := commitsMap[commitID]
|
||||
if ok {
|
||||
commitCommits[i] = c
|
||||
commitCommits[path] = c
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -364,8 +161,8 @@ revListLoop:
|
|||
if _, err := batchReader.Discard(1); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
commitCommits[i] = c
|
||||
commitCommits[path] = c
|
||||
}
|
||||
|
||||
return commitCommits, scan.Err()
|
||||
return commitCommits, nil
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue