/** Provides classes for working with HTML documents. */ import javascript module HTML { /** * An HTML file. */ class HtmlFile extends File { HtmlFile() { this.getFileType().isHtml() } } /** * A file that may contain HTML elements. * * This is either an `.html` file or a source code file containing * embedded HTML snippets. */ private class FileContainingHtml extends File { FileContainingHtml() { this.getFileType().isHtml() or // The file contains an expression containing an HTML element exists(Expr e | e.getFile() = this and xml_element_parent_expression(_, e, _) ) } } /** Gets `i`th root node of the HTML fragment embedded in the given expression, if any. */ Element getHtmlElementFromExpr(Expr e, int i) { xml_element_parent_expression(result, e, i) } /** * An HTML element. * * Example: * * ``` * Semmle * ``` */ class Element extends Locatable, @xmlelement { Element() { exists(FileContainingHtml f | xmlElements(this, _, _, _, f)) } override Location getLocation() { xmllocations(this, result) } /** * Gets the name of this HTML element. * * For example, the name of `
` is `br`. */ string getName() { xmlElements(this, result, _, _, _) } /** * Gets the parent element of this element, if any. */ Element getParent() { xmlElements(this, _, result, _, _) } /** * Holds if this is a toplevel element, that is, if it does not have a parent element. */ predicate isTopLevel() { not exists(this.getParent()) } /** * Gets the root HTML document element in which this element is contained. */ DocumentElement getDocument() { result = this.getRoot() } /** * Gets the root element in which this element is contained. */ Element getRoot() { if this.isTopLevel() then result = this else result = this.getParent().getRoot() } /** * Gets the `i`th child element (0-based) of this element. */ Element getChild(int i) { xmlElements(result, _, this, i, _) } /** * Gets a child element of this element. */ Element getChild() { result = this.getChild(_) } /** * Gets the `i`th attribute (0-based) of this element. */ Attribute getAttribute(int i) { xmlAttrs(result, this, _, _, i, _) } /** * Gets an attribute of this element. */ Attribute getAnAttribute() { result = this.getAttribute(_) } /** * Gets an attribute of this element that has the given name. */ Attribute getAttributeByName(string name) { result = this.getAnAttribute() and result.getName() = name } override string toString() { result = "<" + this.getName() + ">..." } override string getAPrimaryQlClass() { result = "HTML::Element" } } /** * An attribute of an HTML element. * * Examples: * * ``` * * target=_blank * >Semmle * ``` */ class Attribute extends Locatable, @xmlattribute { Attribute() { exists(FileContainingHtml f | xmlAttrs(this, _, _, _, _, f)) } override Location getLocation() { xmllocations(this, result) } /** * Gets the inline script of this attribute, if any. */ CodeInAttribute getCodeInAttribute() { toplevel_parent_xml_node(result, this) } /** * Gets the element to which this attribute belongs. */ Element getElement() { xmlAttrs(this, result, _, _, _, _) } /** * Gets the root element in which the element to which this attribute * belongs is contained. */ Element getRoot() { result = this.getElement().getRoot() } /** * Gets the name of this attribute. */ string getName() { xmlAttrs(this, _, result, _, _, _) } /** * Gets the value of this attribute. * * For attributes without an explicitly specified value, the * result is the empty string. */ string getValue() { xmlAttrs(this, _, _, result, _, _) } override string toString() { result = this.getName() + "=" + this.getValue() } override string getAPrimaryQlClass() { result = "HTML::Attribute" } } /** * An HTML `` element. * * Example: * * ``` * * * This is a test. * * * ``` */ class DocumentElement extends Element { DocumentElement() { this.getName() = "html" } } /** * An HTML ` * ``` */ class IframeElement extends Element { IframeElement() { this.getName() = "iframe" } /** * Gets the value of the `src` attribute. */ string getSourcePath() { result = this.getAttributeByName("src").getValue() } } /** * An HTML ` * ``` */ class ScriptElement extends Element { ScriptElement() { this.getName() = "script" } /** * Gets the absolute file system path the value of the `src` attribute * of this script tag resolves to, if any. * * Path resolution is currently limited to absolute `file://` URLs, * absolute file system paths starting with `/`, and paths relative * to the enclosing HTML file. Base URLs are not taken into account. */ string resolveSourcePath() { exists(string path | path = this.getSourcePath() | result = path.regexpCapture("file://(/.*)", 1) or not path.regexpMatch("(\\w+:)?//.*") and result = this.getSourcePath().(ScriptSrcPath).resolve(this.getSearchRoot()).toString() ) } /** * Gets the value of the `src` attribute. */ string getSourcePath() { result = this.getAttributeByName("src").getValue() } /** * Gets the value of the `integrity` attribute. */ string getIntegrityDigest() { result = this.getAttributeByName("integrity").getValue() } /** * Gets the folder relative to which the `src` attribute is resolved. */ Folder getSearchRoot() { if this.getSourcePath().matches("/%") then result.getBaseName() = "" else result = this.getFile().getParentContainer() } /** * Gets the script referred to by the `src` attribute, * if it can be determined. */ Script resolveSource() { result.getFile().getAbsolutePath() = this.resolveSourcePath() } /** * Gets the inline script of this script element, if any. */ private InlineScript getInlineScript() { toplevel_parent_xml_node(result, this) and // the src attribute has precedence not exists(this.getSourcePath()) } /** * Gets the script of this element, if it can be determined. */ Script getScript() { result = this.getInlineScript() or result = this.resolveSource() } override string getAPrimaryQlClass() { result = "HTML::ScriptElement" } } /** * Holds if there is an HTML `