From 519169ee7b6ad95654ed6e098f5de4ff4ecd2063 Mon Sep 17 00:00:00 2001 From: Robert Wolff Date: Wed, 5 Feb 2025 09:04:19 +0000 Subject: [PATCH] fix: more permissive markup commit hash detection (#6784) This allows many more variants of commit hashes to be detected and interpreted as link if they are enclosed by up to two different non-word/non-digit characters. I also had in mind RTL languages, where the question mark and similar symbols are on the left of the commit hash. Resolves #6771 Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6784 Reviewed-by: Earl Warren Reviewed-by: Gusted Co-authored-by: Robert Wolff Co-committed-by: Robert Wolff --- modules/markup/html.go | 9 +++++---- modules/markup/html_internal_test.go | 12 +++++++++++- modules/markup/html_test.go | 1 + 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index ca9857d2bf..936d717903 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -1,4 +1,5 @@ // Copyright 2017 The Gitea Authors. All rights reserved. +// Copyright 2025 The Forgejo Authors. // SPDX-License-Identifier: MIT package markup @@ -48,13 +49,13 @@ var ( // hashCurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae // Although SHA1 hashes are 40 chars long, SHA256 are 64, the regex matches the hash from 7 to 64 chars in length // so that abbreviated hash links can be used as well. This matches git and GitHub usability. - hashCurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,64})(?:\s|$|\)|\]|[.,:](\s|$))`) + hashCurrentPattern = regexp.MustCompile(`(?:^|\s)[^\w\d]{0,2}([0-9a-f]{7,64})[^\w\d]{0,2}(?:\s|$)`) // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) - // anySHA1Pattern splits url containing SHA into parts - anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`) + // anyHashPattern splits url containing SHA into parts + anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(/[-+~_%.a-zA-Z0-9/]+)?(\?[-+~_%\.a-zA-Z0-9=&]+)?(#[-+~_%.a-zA-Z0-9]+)?`) // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) @@ -1174,7 +1175,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) { } } -// hashCurrentPatternProcessor renders SHA1 strings to corresponding links that +// hashCurrentPatternProcessor renders SHA1/SHA256 strings to corresponding links that // are assumed to be in the same repository. func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) { if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" { diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go index a72be9f8cf..71be75f6bd 100644 --- a/modules/markup/html_internal_test.go +++ b/modules/markup/html_internal_test.go @@ -1,4 +1,5 @@ // Copyright 2018 The Gitea Authors. All rights reserved. +// Copyright 2025 The Forgejo Authors. // SPDX-License-Identifier: MIT package markup @@ -391,7 +392,7 @@ func TestRender_FullIssueURLs(t *testing.T) { `testOrg/testOrgRepo#2/commits`) } -func TestRegExp_sha1CurrentPattern(t *testing.T) { +func TestRegExp_hashCurrentPattern(t *testing.T) { trueTestCases := []string{ "d8a994ef243349f321568f9e36d5c3f444b99cae", "abcdefabcdefabcdefabcdefabcdefabcdefabcd", @@ -399,6 +400,13 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) { "[abcdefabcdefabcdefabcdefabcdefabcdefabcd]", "abcdefabcdefabcdefabcdefabcdefabcdefabcd.", "abcdefabcdefabcdefabcdefabcdefabcdefabcd:", + "d8a994ef243349f321568f9e36d5c3f444b99cae12424fa123391042fbae2319", + "abcdefd?", + "abcdefd!", + "!abcd3ef", + ":abcd3ef", + ".abcd3ef", + " (abcd3ef). ", } falseTestCases := []string{ "test", @@ -406,6 +414,8 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) { "e59ff077-2d03-4e6b-964d-63fbaea81f", "abcdefghijklmnopqrstuvwxyzabcdefghijklmn", "abcdefghijklmnopqrstuvwxyzabcdefghijklmO", + "commit/abcdefd", + "abcd3ef...defabcd", } for _, testCase := range trueTestCases { diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 6c00c5e7c8..7d954d2a16 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -1,4 +1,5 @@ // Copyright 2017 The Gitea Authors. All rights reserved. +// Copyright 2025 The Forgejo Authors. // SPDX-License-Identifier: MIT package markup_test