fix: more permissive markup commit hash detection (#6784)

This allows many more variants of commit hashes to be detected and interpreted as link if they are enclosed by up to two different non-word/non-digit characters. I also had in mind RTL languages, where the question mark and similar symbols are on the left of the commit hash.

Resolves #6771

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6784
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Robert Wolff <mahlzahn@posteo.de>
Co-committed-by: Robert Wolff <mahlzahn@posteo.de>
This commit is contained in:
Robert Wolff 2025-02-05 09:04:19 +00:00 committed by Gusted
parent 499497c959
commit 519169ee7b
3 changed files with 17 additions and 5 deletions

View file

@ -1,4 +1,5 @@
// Copyright 2017 The Gitea Authors. All rights reserved. // Copyright 2017 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
package markup package markup
@ -48,13 +49,13 @@ var (
// hashCurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae // hashCurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, SHA256 are 64, the regex matches the hash from 7 to 64 chars in length // Although SHA1 hashes are 40 chars long, SHA256 are 64, the regex matches the hash from 7 to 64 chars in length
// so that abbreviated hash links can be used as well. This matches git and GitHub usability. // so that abbreviated hash links can be used as well. This matches git and GitHub usability.
hashCurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,64})(?:\s|$|\)|\]|[.,:](\s|$))`) hashCurrentPattern = regexp.MustCompile(`(?:^|\s)[^\w\d]{0,2}([0-9a-f]{7,64})[^\w\d]{0,2}(?:\s|$)`)
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anySHA1Pattern splits url containing SHA into parts // anyHashPattern splits url containing SHA into parts
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`) anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
@ -1174,7 +1175,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) {
} }
} }
// hashCurrentPatternProcessor renders SHA1 strings to corresponding links that // hashCurrentPatternProcessor renders SHA1/SHA256 strings to corresponding links that
// are assumed to be in the same repository. // are assumed to be in the same repository.
func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) { func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" { if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {

View file

@ -1,4 +1,5 @@
// Copyright 2018 The Gitea Authors. All rights reserved. // Copyright 2018 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
package markup package markup
@ -391,7 +392,7 @@ func TestRender_FullIssueURLs(t *testing.T) {
`<a href="http://localhost:3000/testOrg/testOrgRepo/pulls/2/commits" class="ref-issue">testOrg/testOrgRepo#2/commits</a>`) `<a href="http://localhost:3000/testOrg/testOrgRepo/pulls/2/commits" class="ref-issue">testOrg/testOrgRepo#2/commits</a>`)
} }
func TestRegExp_sha1CurrentPattern(t *testing.T) { func TestRegExp_hashCurrentPattern(t *testing.T) {
trueTestCases := []string{ trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae", "d8a994ef243349f321568f9e36d5c3f444b99cae",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd", "abcdefabcdefabcdefabcdefabcdefabcdefabcd",
@ -399,6 +400,13 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
"[abcdefabcdefabcdefabcdefabcdefabcdefabcd]", "[abcdefabcdefabcdefabcdefabcdefabcdefabcd]",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd.", "abcdefabcdefabcdefabcdefabcdefabcdefabcd.",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd:", "abcdefabcdefabcdefabcdefabcdefabcdefabcd:",
"d8a994ef243349f321568f9e36d5c3f444b99cae12424fa123391042fbae2319",
"abcdefd?",
"abcdefd!",
"!abcd3ef",
":abcd3ef",
".abcd3ef",
" (abcd3ef). ",
} }
falseTestCases := []string{ falseTestCases := []string{
"test", "test",
@ -406,6 +414,8 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
"e59ff077-2d03-4e6b-964d-63fbaea81f", "e59ff077-2d03-4e6b-964d-63fbaea81f",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmn", "abcdefghijklmnopqrstuvwxyzabcdefghijklmn",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmO", "abcdefghijklmnopqrstuvwxyzabcdefghijklmO",
"commit/abcdefd",
"abcd3ef...defabcd",
} }
for _, testCase := range trueTestCases { for _, testCase := range trueTestCases {

View file

@ -1,4 +1,5 @@
// Copyright 2017 The Gitea Authors. All rights reserved. // Copyright 2017 The Gitea Authors. All rights reserved.
// Copyright 2025 The Forgejo Authors.
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
package markup_test package markup_test