Merge different languages for language stats (#24900)

Fix #24896

If users set different languages by `linguist-language`, the `stats` map
could be: `java: 100, Java: 200`.

Language stats are stored as case-insensitive in database and there is a
unique key.

So, the different language names should be merged to one unique name:
`Java: 300`
This commit is contained in:
wxiaoguang 2023-05-25 03:37:36 +08:00 committed by GitHub
parent 63d5e762d8
commit 395bb33e4c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 59 additions and 6 deletions

View file

@ -180,7 +180,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
// - eg. do the all the detection tests using filename first before reading content.
language := analyze.GetCodeLanguage(f.Name(), content)
if language == enry.OtherLanguage || language == "" {
if language == "" {
continue
}
@ -192,8 +192,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
included, checked := includedLanguage[language]
if !checked {
langtype := enry.GetLanguageType(language)
included = langtype == enry.Programming || langtype == enry.Markup
langType := enry.GetLanguageType(language)
included = langType == enry.Programming || langType == enry.Markup
includedLanguage[language] = included
}
if included {
@ -210,7 +210,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
sizes[firstExcludedLanguage] = firstExcludedLanguageSize
}
return sizes, nil
return mergeLanguageStats(sizes), nil
}
func discardFull(rd *bufio.Reader, discard int64) error {