diff --git a/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java index e337f2efd8e..2b448ac54c5 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java @@ -177,14 +177,22 @@ public class ASTExtractor { this.scopeManager = scopeManager; this.lexicalExtractor = lexicalExtractor; this.regexpExtractor = new RegExpExtractor(trapwriter, locationManager); - this.toplevelLabel = - trapwriter.globalID( - "script;{" - + locationManager.getFileLabel() - + "}," - + locationManager.getStartLine() - + ',' - + locationManager.getStartColumn()); + this.toplevelLabel = makeTopLevelLabel(trapwriter, locationManager.getFileLabel(), locationManager.getStartLine(), locationManager.getStartColumn()); + } + + /** + * Returns the label for the top-level starting at the given location. + *

+ * May be used to refer to the top-level before it has been extracted. + */ + public static Label makeTopLevelLabel(TrapWriter trapWriter, Label fileLabel, int startLine, int startColumn) { + return trapWriter.globalID( + "script;{" + + fileLabel + + "}," + + startLine + + ',' + + startColumn); } public TrapWriter getTrapwriter() { diff --git a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java index bd5f7d33de6..f04ea325528 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java @@ -10,9 +10,11 @@ import com.semmle.js.extractor.ExtractorConfig.Platform; import com.semmle.js.extractor.ExtractorConfig.SourceType; import com.semmle.js.parser.ParseError; import com.semmle.util.data.Option; +import com.semmle.util.data.Pair; import com.semmle.util.data.StringUtil; import com.semmle.util.io.WholeIO; import com.semmle.util.trap.TrapWriter; +import com.semmle.util.trap.TrapWriter.Label; import net.htmlparser.jericho.Attribute; import net.htmlparser.jericho.Attributes; @@ -23,16 +25,15 @@ import net.htmlparser.jericho.Segment; /** Extractor for handling HTML and XHTML files. */ public class HTMLExtractor implements IExtractor { + private LoCInfo locInfo = new LoCInfo(0, 0); + private class JavaScriptHTMLElementHandler implements HtmlPopulator.ElementHandler { private final ScopeManager scopeManager; private final TextualExtractor textualExtractor; - private LoCInfo locInfo; - + public JavaScriptHTMLElementHandler(TextualExtractor textualExtractor) { this.textualExtractor = textualExtractor; - this.locInfo = new LoCInfo(0, 0); - this.scopeManager = new ScopeManager(textualExtractor.getTrapwriter(), config.getEcmaVersion()); } @@ -42,8 +43,7 @@ public class HTMLExtractor implements IExtractor { * attribute values. */ @Override - public void handleElement(Element elt) { - LoCInfo snippetLoC = null; + public void handleElement(Element elt, HtmlPopulator.Context context) { if (elt.getName().equals(HTMLElementName.SCRIPT)) { SourceType sourceType = getScriptSourceType(elt, textualExtractor.getExtractedFile()); if (sourceType != null) { @@ -72,16 +72,17 @@ public class HTMLExtractor implements IExtractor { source = source.replace("", " "); if (!source.trim().isEmpty()) { RowColumnVector contentStart = content.getRowColumnVector(); - snippetLoC = - extractSnippet( - TopLevelKind.inlineScript, - config.withSourceType(sourceType), - scopeManager, - textualExtractor, - source, - contentStart.getRow(), - contentStart.getColumn(), - isTypeScript); + extractSnippet( + TopLevelKind.inlineScript, + config.withSourceType(sourceType), + scopeManager, + textualExtractor, + source, + contentStart.getRow(), + contentStart.getColumn(), + isTypeScript, + elt, + context); } } } else { @@ -95,16 +96,17 @@ public class HTMLExtractor implements IExtractor { String source = attr.getValue(); RowColumnVector valueStart = attr.getValueSegment().getRowColumnVector(); if (JS_ATTRIBUTE.matcher(attr.getName()).matches()) { - snippetLoC = - extractSnippet( - TopLevelKind.eventHandler, - config, - scopeManager, - textualExtractor, - source, - valueStart.getRow(), - valueStart.getColumn(), - false /* isTypeScript */); + extractSnippet( + TopLevelKind.eventHandler, + config, + scopeManager, + textualExtractor, + source, + valueStart.getRow(), + valueStart.getColumn(), + false /* isTypeScript */, + attr, + context); } else if (isAngularTemplateAttributeName(attr.getName())) { // For an attribute *ngFor="let var of EXPR", start parsing at EXPR int offset = 0; @@ -116,37 +118,33 @@ public class HTMLExtractor implements IExtractor { source = expr; } } - snippetLoC = - extractSnippet( - TopLevelKind.eventHandler, - config.withSourceType(SourceType.ANGULAR_TEMPLATE), - scopeManager, - textualExtractor, - source, - valueStart.getRow(), - valueStart.getColumn() + offset, - false /* isTypeScript */); + extractSnippet( + TopLevelKind.eventHandler, + config.withSourceType(SourceType.ANGULAR_TEMPLATE), + scopeManager, + textualExtractor, + source, + valueStart.getRow(), + valueStart.getColumn() + offset, + false /* isTypeScript */, + attr, + context); } else if (source.startsWith("javascript:")) { source = source.substring(11); - snippetLoC = - extractSnippet( - TopLevelKind.javascriptUrl, - config, - scopeManager, - textualExtractor, - source, - valueStart.getRow(), - valueStart.getColumn() + 11, - false /* isTypeScript */); + extractSnippet( + TopLevelKind.javascriptUrl, + config, + scopeManager, + textualExtractor, + source, + valueStart.getRow(), + valueStart.getColumn() + 11, + false /* isTypeScript */, + attr, + context); } } } - - if (snippetLoC != null) locInfo.add(snippetLoC); - } - - public LoCInfo getLoCInfo() { - return this.locInfo; } } @@ -202,7 +200,7 @@ public class HTMLExtractor implements IExtractor { extractor.doit(Option.some(eltHandler)); - return eltHandler.getLoCInfo(); + return locInfo; } /** @@ -270,7 +268,7 @@ public class HTMLExtractor implements IExtractor { return val == null ? val : StringUtil.lc(val); } - private LoCInfo extractSnippet( + private void extractSnippet( TopLevelKind toplevelKind, ExtractorConfig config, ScopeManager scopeManager, @@ -278,10 +276,18 @@ public class HTMLExtractor implements IExtractor { String source, int line, int column, - boolean isTypeScript) { + boolean isTypeScript, + Segment parentHtmlNode, + HtmlPopulator.Context context) { + TrapWriter trapWriter = textualExtractor.getTrapwriter(); + LocationManager locationManager = textualExtractor.getLocationManager(); + LocationManager scriptLocationManager = + new LocationManager( + locationManager.getSourceFile(), trapWriter, locationManager.getFileLabel()); + scriptLocationManager.setStart(line, column); if (isTypeScript) { if (isEmbedded) { - return null; // Do not extract files from HTML embedded in other files. + return; // Do not extract files from HTML embedded in other files. } Path file = textualExtractor.getExtractedFile().toPath(); FileSnippet snippet = @@ -302,28 +308,36 @@ public class HTMLExtractor implements IExtractor { } state.getSnippets().put(virtualFile, snippet); } - return null; // LoC info is accounted for later + Label topLevelLabel = ASTExtractor.makeTopLevelLabel( + textualExtractor.getTrapwriter(), + scriptLocationManager.getFileLabel(), + scriptLocationManager.getStartLine(), + scriptLocationManager.getStartColumn()); + emitTopLevelXmlNodeBinding(parentHtmlNode, topLevelLabel, context, trapWriter); + // Note: LoC info is accounted for later, so not added here. + return; } - TrapWriter trapwriter = textualExtractor.getTrapwriter(); - LocationManager locationManager = textualExtractor.getLocationManager(); - LocationManager scriptLocationManager = - new LocationManager( - locationManager.getSourceFile(), trapwriter, locationManager.getFileLabel()); - scriptLocationManager.setStart(line, column); JSExtractor extractor = new JSExtractor(config); try { TextualExtractor tx = new TextualExtractor( - trapwriter, + trapWriter, scriptLocationManager, source, config.getExtractLines(), textualExtractor.getMetrics(), textualExtractor.getExtractedFile()); - return extractor.extract(tx, source, toplevelKind, scopeManager).snd(); + Pair result = extractor.extract(tx, source, toplevelKind, scopeManager); + emitTopLevelXmlNodeBinding(parentHtmlNode, result.fst(), context, trapWriter); + locInfo.add(result.snd()); } catch (ParseError e) { e.setPosition(scriptLocationManager.translatePosition(e.getPosition())); throw e.asUserError(); } } + + private void emitTopLevelXmlNodeBinding(Segment parentHtmlNode, Label topLevelLabel, HtmlPopulator.Context context, TrapWriter writer) { + Label htmlNodeLabel = context.getNodeLabel(parentHtmlNode); + writer.addTuple("toplevel_parent_xml_node", topLevelLabel, htmlNodeLabel); + } } diff --git a/javascript/ql/src/semmle/javascript/HTML.qll b/javascript/ql/src/semmle/javascript/HTML.qll index 36c7e967f88..d0e17e8ad61 100644 --- a/javascript/ql/src/semmle/javascript/HTML.qll +++ b/javascript/ql/src/semmle/javascript/HTML.qll @@ -88,26 +88,7 @@ module HTML { * Gets the inline script of the given attribute, if any. */ CodeInAttribute getCodeInAttribute(XMLAttribute attribute) { - exists( - string f, Location l1, int sl1, int sc1, int el1, int ec1, Location l2, int sl2, int sc2, - int el2, int ec2 - | - l1 = attribute.getLocation() and - l2 = result.getLocation() and - l1.hasLocationInfo(f, sl1, sc1, el1, ec1) and - l2.hasLocationInfo(f, sl2, sc2, el2, ec2) - | - ( - sl1 = sl2 and sc1 < sc2 - or - sl1 < sl2 - ) and - ( - el1 = el2 and ec1 > ec2 - or - el1 > el2 - ) - ) + toplevel_parent_xml_node(result, attribute) } /** @@ -233,26 +214,7 @@ module HTML { * Gets the inline script of this script element, if any. */ private InlineScript getInlineScript() { - exists( - string f, Location l1, int sl1, int sc1, int el1, int ec1, Location l2, int sl2, int sc2, - int el2, int ec2 - | - l1 = getLocation() and - l2 = result.getLocation() and - l1.hasLocationInfo(f, sl1, sc1, el1, ec1) and - l2.hasLocationInfo(f, sl2, sc2, el2, ec2) - | - ( - sl1 = sl2 and sc1 < sc2 - or - sl1 < sl2 - ) and - ( - el1 = el2 and ec1 > ec2 - or - el1 > el2 - ) - ) and + toplevel_parent_xml_node(result, this) and // the src attribute has precedence not exists(getSourcePath()) } diff --git a/javascript/ql/src/semmlecode.javascript.dbscheme b/javascript/ql/src/semmlecode.javascript.dbscheme index 16cee27e77d..281ca4de940 100644 --- a/javascript/ql/src/semmlecode.javascript.dbscheme +++ b/javascript/ql/src/semmlecode.javascript.dbscheme @@ -131,6 +131,11 @@ is_nodejs (int tl: @toplevel ref); is_es2015_module (int tl: @toplevel ref); is_closure_module (int tl: @toplevel ref); +@xml_node_with_code = @xmlelement | @xmlattribute; +toplevel_parent_xml_node( + unique int toplevel: @toplevel ref, + int xmlnode: @xml_node_with_code ref); + // statements #keyset[parent, idx] stmts (unique int id: @stmt,