Update to bluemonday-1.0.6 (#15294)

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2021-04-05 22:38:31 +01:00 committed by GitHub
parent e10d028b03
commit 04196b7658
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 155 additions and 70 deletions

View file

@ -39,7 +39,7 @@ import (
"golang.org/x/net/html"
cssparser "github.com/chris-ramon/douceur/parser"
"github.com/aymerick/douceur/parser"
)
var (
@ -286,7 +286,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case html.StartTagToken:
mostRecentlyStartedToken = strings.ToLower(token.Data)
mostRecentlyStartedToken = normaliseElementName(token.Data)
aps, ok := p.elsAndAttrs[token.Data]
if !ok {
@ -329,7 +329,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case html.EndTagToken:
if mostRecentlyStartedToken == strings.ToLower(token.Data) {
if mostRecentlyStartedToken == normaliseElementName(token.Data) {
mostRecentlyStartedToken = ""
}
@ -407,11 +407,11 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
if !skipElementContent {
switch mostRecentlyStartedToken {
case "script":
case `script`:
// not encouraged, but if a policy allows JavaScript we
// should not HTML escape it as that would break the output
buff.WriteString(token.Data)
case "style":
case `style`:
// not encouraged, but if a policy allows CSS styles we
// should not HTML escape it as that would break the output
buff.WriteString(token.Data)
@ -721,6 +721,26 @@ func (p *Policy) sanitizeAttrs(
}
}
if p.requireCrossOriginAnonymous && len(cleanAttrs) > 0 {
switch elementName {
case "audio", "img", "link", "script", "video":
var crossOriginFound bool
for _, htmlAttr := range cleanAttrs {
if htmlAttr.Key == "crossorigin" {
crossOriginFound = true
htmlAttr.Val = "anonymous"
}
}
if !crossOriginFound {
crossOrigin := html.Attribute{}
crossOrigin.Key = "crossorigin"
crossOrigin.Val = "anonymous"
cleanAttrs = append(cleanAttrs, crossOrigin)
}
}
}
return cleanAttrs
}
@ -744,7 +764,7 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At
if len(attr.Val) > 0 && attr.Val[len(attr.Val)-1] != ';' {
attr.Val = attr.Val + ";"
}
decs, err := cssparser.ParseDeclarations(attr.Val)
decs, err := parser.ParseDeclarations(attr.Val)
if err != nil {
attr.Val = ""
return attr
@ -944,3 +964,23 @@ func (p *Policy) matchRegex(elementName string) (map[string]attrPolicy, bool) {
}
return aps, matched
}
// normaliseElementName takes a HTML element like <script> which is user input
// and returns a lower case version of it that is immune to UTF-8 to ASCII
// conversion tricks (like the use of upper case cyrillic i scrİpt which a
// strings.ToLower would convert to script). Instead this func will preserve
// all non-ASCII as their escaped equivalent, i.e. \u0130 which reveals the
// characters when lower cased
func normaliseElementName(str string) string {
// that useful QuoteToASCII put quote marks at the start and end
// so those are trimmed off
return strings.TrimSuffix(
strings.TrimPrefix(
strings.ToLower(
strconv.QuoteToASCII(str),
),
`"`),
`"`,
)
}