From 8438b893ec3e7afe174d3f1d202ebae81c5d8e5e Mon Sep 17 00:00:00 2001 From: Sauyon Lee Date: Mon, 25 Jan 2021 12:37:07 +0000 Subject: [PATCH 01/11] Add HTML tracing capability --- Makefile | 2 +- codeql-tools/index.cmd | 1 + codeql-tools/index.sh | 1 + codeql-tools/pre-finalize.cmd | 18 + codeql-tools/pre-finalize.sh | 17 + extractor/dbscheme/dbscheme.go | 9 + extractor/dbscheme/tables.go | 79 +++ ql/src/go.dbscheme | 76 ++- ql/src/go.qll | 1 + ql/src/semmle/go/HTML.qll | 214 ++++++++ ql/src/xml.dbscheme | 144 +++++ .../html/htmlelements.expected | 4 + ql/test/extractor-tests/html/htmlelements.ql | 3 + ql/test/extractor-tests/html/main.go | 1 + ql/test/extractor-tests/html/test.html | 11 + .../go.dbscheme | 510 ++++++++++++++++++ .../old.dbscheme | 436 +++++++++++++++ .../upgrade.properties | 2 + 18 files changed, 1527 insertions(+), 2 deletions(-) create mode 100644 codeql-tools/pre-finalize.cmd create mode 100755 codeql-tools/pre-finalize.sh create mode 100644 ql/src/semmle/go/HTML.qll create mode 100644 ql/src/xml.dbscheme create mode 100644 ql/test/extractor-tests/html/htmlelements.expected create mode 100644 ql/test/extractor-tests/html/htmlelements.ql create mode 100644 ql/test/extractor-tests/html/main.go create mode 100644 ql/test/extractor-tests/html/test.html create mode 100644 upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/go.dbscheme create mode 100644 upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/old.dbscheme create mode 100644 upgrades/4affa49dbe2bbab1a33f0e3ea6b045116abbcfda/upgrade.properties diff --git a/Makefile b/Makefile index 29bb3321927..d4631e810cc 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ CODEQL_PLATFORM = osx64 endif endif -CODEQL_TOOLS = $(addprefix codeql-tools/,autobuild.cmd autobuild.sh index.cmd index.sh linux64 osx64 win64) +CODEQL_TOOLS = $(addprefix codeql-tools/,autobuild.cmd autobuild.sh pre-finalize.cmd pre-finalize.sh index.cmd index.sh linux64 osx64 win64) EXTRACTOR_PACK_OUT = build/codeql-extractor-go diff --git a/codeql-tools/index.cmd b/codeql-tools/index.cmd index 15d7548c1d9..21c8f64df92 100644 --- a/codeql-tools/index.cmd +++ b/codeql-tools/index.cmd @@ -2,6 +2,7 @@ SETLOCAL EnableDelayedExpansion type NUL && "%CODEQL_EXTRACTOR_GO_ROOT%/tools/%CODEQL_PLATFORM%/go-extractor.exe" -mod=vendor ./... +type NUL && "%CODEQL_EXTRACTOR_GO_ROOT%/tools/pre-finalize.cmd" exit /b %ERRORLEVEL% ENDLOCAL diff --git a/codeql-tools/index.sh b/codeql-tools/index.sh index 655fb5eeca3..877400d37f2 100755 --- a/codeql-tools/index.sh +++ b/codeql-tools/index.sh @@ -8,3 +8,4 @@ if [ "$CODEQL_PLATFORM" != "linux64" ] && [ "$CODEQL_PLATFORM" != "osx64" ] ; th fi "$CODEQL_EXTRACTOR_GO_ROOT/tools/$CODEQL_PLATFORM/go-extractor" -mod=vendor ./... +"$CODEQL_EXTRACTOR_GO_ROOT/tools/pre-finalize.sh" diff --git a/codeql-tools/pre-finalize.cmd b/codeql-tools/pre-finalize.cmd new file mode 100644 index 00000000000..c4c2e4a8b07 --- /dev/null +++ b/codeql-tools/pre-finalize.cmd @@ -0,0 +1,18 @@ +@echo off +SETLOCAL EnableDelayedExpansion + +if NOT "%CODEQL_EXTRACTOR_GO_EXTRACT_HTML%"=="no" ( + type NUL && "%CODEQL_DIST%/codeql.exe" database index-files ^ + --include-extension=.htm ^ + --include-extension=.html ^ + --include-extension=.xhtm ^ + --include-extension=.xhtml ^ + --include-extension=.vue ^ + --size-limit 10m ^ + --language html ^ + -- ^ + "%CODEQL_EXTRACTOR_GO_WIP_DATABASE%" ^ + || echo "HTML extraction failed; continuing" + + exit /b %ERRORLEVEL% +) diff --git a/codeql-tools/pre-finalize.sh b/codeql-tools/pre-finalize.sh new file mode 100755 index 00000000000..95a9ffc0644 --- /dev/null +++ b/codeql-tools/pre-finalize.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -eu + +if [ "${CODEQL_EXTRACTOR_GO_EXTRACT_HTML:-yes}" != "no" ]; then + "$CODEQL_DIST/codeql" database index-files \ + --include-extension=.htm \ + --include-extension=.html \ + --include-extension=.xhtm \ + --include-extension=.xhtml \ + --include-extension=.vue \ + --size-limit 10m \ + --language html \ + -- \ + "$CODEQL_EXTRACTOR_GO_WIP_DATABASE" \ + || echo "HTML extraction failed; continuing." +fi diff --git a/extractor/dbscheme/dbscheme.go b/extractor/dbscheme/dbscheme.go index 0eadc3efd9b..fddaa3e6e1f 100644 --- a/extractor/dbscheme/dbscheme.go +++ b/extractor/dbscheme/dbscheme.go @@ -362,6 +362,15 @@ func NewUnionType(name string, parents ...*UnionType) *UnionType { return tp } +// AddChild adds the type with given `name` to the union type. +// This is useful if a type defined in a snippet should be a child of a type defined in Go. +func (parent *UnionType) AddChild(name string) bool { + tp := &PrimaryKeyType{name} + // don't add tp to types; it's expected that it's already in the db somehow. + parent.components = append(parent.components, tp) + return true +} + // NewAliasType constructs a new alias type with the given `name` that aliases `underlying` func NewAliasType(name string, underlying Type) *AliasType { tp := &AliasType{name, underlying} diff --git a/extractor/dbscheme/tables.go b/extractor/dbscheme/tables.go index 3847324b5db..233f087a2b1 100644 --- a/extractor/dbscheme/tables.go +++ b/extractor/dbscheme/tables.go @@ -44,12 +44,91 @@ snapshotDate(unique date snapshotDate : date ref); sourceLocationPrefix(varchar(900) prefix : string ref); `) +// Copied directly from the XML dbscheme +var xmlSnippet = AddDefaultSnippet(` +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; +`) + // ContainerType is the type of files and folders var ContainerType = NewUnionType("@container") // LocatableType is the type of program entities that have locations var LocatableType = NewUnionType("@locatable") +// Adds xmllocatable as a locatable +var XmlLocatableAsLocatable = LocatableType.AddChild("@xmllocatable") + // NodeType is the type of AST nodes var NodeType = NewUnionType("@node", LocatableType) diff --git a/ql/src/go.dbscheme b/ql/src/go.dbscheme index 4affa49dbe2..2e92b436892 100644 --- a/ql/src/go.dbscheme +++ b/ql/src/go.dbscheme @@ -36,6 +36,80 @@ snapshotDate(unique date snapshotDate : date ref); sourceLocationPrefix(varchar(900) prefix : string ref); + +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref, int endLine: int ref, int endColumn: int ref); @@ -133,7 +207,7 @@ has_ellipsis(int id: @callorconversionexpr ref); @container = @file | @folder; -@locatable = @node | @localscope; +@locatable = @xmllocatable | @node | @localscope; @node = @documentable | @exprparent | @modexprparent | @fieldparent | @stmtparent | @declparent | @scopenode | @comment_group | @comment; diff --git a/ql/src/go.qll b/ql/src/go.qll index 1ef249783a4..6797c20a58d 100644 --- a/ql/src/go.qll +++ b/ql/src/go.qll @@ -12,6 +12,7 @@ import semmle.go.Errors import semmle.go.Expr import semmle.go.Files import semmle.go.GoMod +import semmle.go.HTML import semmle.go.Locations import semmle.go.Packages import semmle.go.Scopes diff --git a/ql/src/semmle/go/HTML.qll b/ql/src/semmle/go/HTML.qll new file mode 100644 index 00000000000..82c8724cd4b --- /dev/null +++ b/ql/src/semmle/go/HTML.qll @@ -0,0 +1,214 @@ +/** Provides classes for working with HTML documents. */ + +import go + +module HTML { + /** + * An HTML file. + */ + class HtmlFile extends File { + HtmlFile() { this.getExtension().regexpMatch("x?html?") } + } + + /** + * An HTML element. + * + * Example: + * + * ``` + * Semmle + * ``` + */ + class Element extends Locatable, @xmlelement { + Element() { exists(HtmlFile f | xmlElements(this, _, _, _, f)) } + + override Location getLocation() { xmllocations(this, result) } + + /** + * Gets the name of this HTML element. + * + * For example, the name of `
` is `br`. + */ + string getName() { xmlElements(this, result, _, _, _) } + + /** + * Gets the parent element of this element, if any. + */ + Element getParent() { xmlElements(this, _, result, _, _) } + + /** + * Holds if this is a toplevel element, that is, if it does not have a parent element. + */ + predicate isTopLevel() { not exists(getParent()) } + + /** + * Gets the root HTML document element in which this element is contained. + */ + DocumentElement getDocument() { result = getRoot() } + + /** + * Gets the root element in which this element is contained. + */ + Element getRoot() { if isTopLevel() then result = this else result = getParent().getRoot() } + + /** + * Gets the `i`th child element (0-based) of this element. + */ + Element getChild(int i) { xmlElements(result, _, this, i, _) } + + /** + * Gets a child element of this element. + */ + Element getChild() { result = getChild(_) } + + /** + * Gets the `i`th attribute (0-based) of this element. + */ + Attribute getAttribute(int i) { xmlAttrs(result, this, _, _, i, _) } + + /** + * Gets an attribute of this element. + */ + Attribute getAnAttribute() { result = getAttribute(_) } + + /** + * Gets an attribute of this element that has the given name. + */ + Attribute getAttributeByName(string name) { + result = getAnAttribute() and + result.getName() = name + } + + /** + * Gets the text node associated with this element. + */ + TextNode getTextNode() { result.getParent() = this } + + override string toString() { result = "<" + getName() + ">..." } + } + + /** + * An attribute of an HTML element. + * + * Examples: + * + * ``` + * + * target=_blank + * >Semmle + * ``` + */ + class Attribute extends Locatable, @xmlattribute { + Attribute() { xmlAttrs(this, _, _, _, _, any(HtmlFile f)) } + + override Location getLocation() { xmllocations(this, result) } + + /** + * Gets the element to which this attribute belongs. + */ + Element getElement() { xmlAttrs(this, result, _, _, _, _) } + + /** + * Gets the root element in which the element to which this attribute + * belongs is contained. + */ + Element getRoot() { result = getElement().getRoot() } + + /** + * Gets the name of this attribute. + */ + string getName() { xmlAttrs(this, _, result, _, _, _) } + + /** + * Gets the value of this attribute. + * + * For attributes without an explicitly specified value, the + * result is the empty string. + */ + string getValue() { xmlAttrs(this, _, _, result, _, _) } + + override string toString() { result = getName() + "=" + getValue() } + } + + /** + * An HTML `` element. + * + * Example: + * + * ``` + * + * + * This is a test. + * + * + * ``` + */ + class DocumentElement extends Element { + DocumentElement() { getName() = "html" } + } + + /** + * An HTML text node. + * + * Example: + * + * ``` + *
+ * This text is represented as a text node. + *
+ * ``` + */ + class TextNode extends Locatable, @xmlcharacters { + TextNode() { exists(HtmlFile f | xmlChars(this, _, _, _, _, f)) } + + override string toString() { result = getText() } + + /** + * Gets the content of this text node. + * + * Note that entity expansion has been performed already. + */ + string getText() { xmlChars(this, result, _, _, _, _) } + + /** + * Gets the parent this text. + */ + Element getParent() { xmlChars(this, _, result, _, _, _) } + + /** + * Gets the child index number of this text node. + */ + int getIndex() { xmlChars(this, _, _, result, _, _) } + + /** + * Holds if this text node is inside a `CDATA` tag. + */ + predicate isCData() { xmlChars(this, _, _, _, 1, _) } + + override Location getLocation() { xmllocations(this, result) } + } + + /** + * An HTML comment. + * + * Example: + * + * ``` + * + * ``` + */ + class CommentNode extends Locatable, @xmlcomment { + CommentNode() { exists(HtmlFile f | xmlComments(this, _, _, f)) } + + /** Gets the element in which this comment occurs. */ + Element getParent() { xmlComments(this, _, result, _) } + + /** Gets the text of this comment, not including delimiters. */ + string getText() { result = toString().regexpCapture("(?s)", 1) } + + override string toString() { xmlComments(this, result, _, _) } + + override Location getLocation() { xmllocations(this, result) } + } +} diff --git a/ql/src/xml.dbscheme b/ql/src/xml.dbscheme new file mode 100644 index 00000000000..8e909080bf3 --- /dev/null +++ b/ql/src/xml.dbscheme @@ -0,0 +1,144 @@ +/* + * External artifacts + */ + +externalData( + int id : @externalDataElement, + string path : string ref, + int column: int ref, + string value : string ref +); + +snapshotDate( + unique date snapshotDate : date ref +); + +sourceLocationPrefix( + string prefix : string ref +); + +/* + * Locations and files + */ + +@location = @location_default ; + +@locatable = @xmllocatable ; + +locations_default( + unique int id: @location_default, + int file: @file ref, + int beginLine: int ref, + int beginColumn: int ref, + int endLine: int ref, + int endColumn: int ref +); + +hasLocation( + int locatableid: @locatable ref, + int id: @location ref +); + +@sourceline = @locatable ; + +#keyset[element_id] +numlines( + int element_id: @sourceline ref, + int num_lines: int ref, + int num_code: int ref, + int num_comment: int ref +); + +files( + unique int id: @file, + string name: string ref, + string simple: string ref, + string ext: string ref, + int fromSource: int ref // deprecated +); + +folders( + unique int id: @folder, + string name: string ref, + string simple: string ref +); + +@container = @folder | @file + +containerparent( + int parent: @container ref, + unique int child: @container ref +); + +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; diff --git a/ql/test/extractor-tests/html/htmlelements.expected b/ql/test/extractor-tests/html/htmlelements.expected new file mode 100644 index 00000000000..8b2fc89ace4 --- /dev/null +++ b/ql/test/extractor-tests/html/htmlelements.expected @@ -0,0 +1,4 @@ +| test.html:2:1:2:23 |

... | +| test.html:3:1:11:5 |