Merge different languages for language stats (#24900)

Fix #24896

If users set different languages by `linguist-language`, the `stats` map
could be: `java: 100, Java: 200`.

Language stats are stored as case-insensitive in database and there is a
unique key.

So, the different language names should be merged to one unique name:
`Java: 300`
This commit is contained in:
wxiaoguang 2023-05-25 03:37:36 +08:00 committed by GitHub
parent 63d5e762d8
commit 395bb33e4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 59 additions and 6 deletions

View file

@ -3,7 +3,46 @@
package git
import (
"strings"
"unicode"
)
const (
fileSizeLimit int64 = 16 * 1024 // 16 KiB
bigFileSize int64 = 1024 * 1024 // 1 MiB
)
// mergeLanguageStats mergers language names with different cases. The name with most upper case letters is used.
func mergeLanguageStats(stats map[string]int64) map[string]int64 {
names := map[string]struct {
uniqueName string
upperCount int
}{}
countUpper := func(s string) (count int) {
for _, r := range s {
if unicode.IsUpper(r) {
count++
}
}
return count
}
for name := range stats {
cnt := countUpper(name)
lower := strings.ToLower(name)
if cnt >= names[lower].upperCount {
names[lower] = struct {
uniqueName string
upperCount int
}{uniqueName: name, upperCount: cnt}
}
}
res := make(map[string]int64, len(names))
for name, num := range stats {
res[names[strings.ToLower(name)].uniqueName] += num
}
return res
}