JS: Restrict names of extracted HTML attributes

This commit is contained in:
Asger Feldthaus
2021-02-19 23:28:28 +00:00
parent d490bea9a9
commit e9c0f170a1

View File

@@ -143,6 +143,18 @@ public class HTMLExtractor implements IExtractor {
}
}
}
@Override
public boolean shouldExtractAttributes(Element element) {
Attributes attributes = element.getAttributes();
if (attributes == null) return false;
for (Attribute attr : attributes) {
if (!VALID_ATTRIBUTE_NAME.matcher(attr.getName()).matches()) {
return false;
}
}
return true;
}
}
private boolean isAngularTemplateAttributeName(String name) {
@@ -153,6 +165,8 @@ public class HTMLExtractor implements IExtractor {
private static final Pattern ANGULAR_FOR_LOOP_DECL = Pattern.compile("^ *let +(\\w+) +of(?: +|(?!\\w))(.*)");
private static final Pattern VALID_ATTRIBUTE_NAME = Pattern.compile("\\*?\\[?\\(?[\\w:_\\-]+\\]?\\)?");
/** List of HTML attributes whose value is interpreted as JavaScript. */
private static final Pattern JS_ATTRIBUTE =
Pattern.compile(