JS: Extract mapping from TopLevel to parent HTML node

This commit is contained in:
Asger Feldthaus
2020-12-14 11:59:08 +00:00
parent 8848ee2d10
commit 3b666a5646
4 changed files with 102 additions and 113 deletions

View File

@@ -177,14 +177,22 @@ public class ASTExtractor {
this.scopeManager = scopeManager;
this.lexicalExtractor = lexicalExtractor;
this.regexpExtractor = new RegExpExtractor(trapwriter, locationManager);
this.toplevelLabel =
trapwriter.globalID(
this.toplevelLabel = makeTopLevelLabel(trapwriter, locationManager.getFileLabel(), locationManager.getStartLine(), locationManager.getStartColumn());
}
/**
* Returns the label for the top-level starting at the given location.
* <p>
* May be used to refer to the top-level before it has been extracted.
*/
public static Label makeTopLevelLabel(TrapWriter trapWriter, Label fileLabel, int startLine, int startColumn) {
return trapWriter.globalID(
"script;{"
+ locationManager.getFileLabel()
+ fileLabel
+ "},"
+ locationManager.getStartLine()
+ startLine
+ ','
+ locationManager.getStartColumn());
+ startColumn);
}
public TrapWriter getTrapwriter() {

View File

@@ -10,9 +10,11 @@ import com.semmle.js.extractor.ExtractorConfig.Platform;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import com.semmle.js.parser.ParseError;
import com.semmle.util.data.Option;
import com.semmle.util.data.Pair;
import com.semmle.util.data.StringUtil;
import com.semmle.util.io.WholeIO;
import com.semmle.util.trap.TrapWriter;
import com.semmle.util.trap.TrapWriter.Label;
import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.Attributes;
@@ -23,16 +25,15 @@ import net.htmlparser.jericho.Segment;
/** Extractor for handling HTML and XHTML files. */
public class HTMLExtractor implements IExtractor {
private LoCInfo locInfo = new LoCInfo(0, 0);
private class JavaScriptHTMLElementHandler implements HtmlPopulator.ElementHandler {
private final ScopeManager scopeManager;
private final TextualExtractor textualExtractor;
private LoCInfo locInfo;
public JavaScriptHTMLElementHandler(TextualExtractor textualExtractor) {
this.textualExtractor = textualExtractor;
this.locInfo = new LoCInfo(0, 0);
this.scopeManager =
new ScopeManager(textualExtractor.getTrapwriter(), config.getEcmaVersion());
}
@@ -42,8 +43,7 @@ public class HTMLExtractor implements IExtractor {
* attribute values.
*/
@Override
public void handleElement(Element elt) {
LoCInfo snippetLoC = null;
public void handleElement(Element elt, HtmlPopulator.Context context) {
if (elt.getName().equals(HTMLElementName.SCRIPT)) {
SourceType sourceType = getScriptSourceType(elt, textualExtractor.getExtractedFile());
if (sourceType != null) {
@@ -72,7 +72,6 @@ public class HTMLExtractor implements IExtractor {
source = source.replace("<![CDATA[", " ").replace("]]>", " ");
if (!source.trim().isEmpty()) {
RowColumnVector contentStart = content.getRowColumnVector();
snippetLoC =
extractSnippet(
TopLevelKind.inlineScript,
config.withSourceType(sourceType),
@@ -81,7 +80,9 @@ public class HTMLExtractor implements IExtractor {
source,
contentStart.getRow(),
contentStart.getColumn(),
isTypeScript);
isTypeScript,
elt,
context);
}
}
} else {
@@ -95,7 +96,6 @@ public class HTMLExtractor implements IExtractor {
String source = attr.getValue();
RowColumnVector valueStart = attr.getValueSegment().getRowColumnVector();
if (JS_ATTRIBUTE.matcher(attr.getName()).matches()) {
snippetLoC =
extractSnippet(
TopLevelKind.eventHandler,
config,
@@ -104,7 +104,9 @@ public class HTMLExtractor implements IExtractor {
source,
valueStart.getRow(),
valueStart.getColumn(),
false /* isTypeScript */);
false /* isTypeScript */,
attr,
context);
} else if (isAngularTemplateAttributeName(attr.getName())) {
// For an attribute *ngFor="let var of EXPR", start parsing at EXPR
int offset = 0;
@@ -116,7 +118,6 @@ public class HTMLExtractor implements IExtractor {
source = expr;
}
}
snippetLoC =
extractSnippet(
TopLevelKind.eventHandler,
config.withSourceType(SourceType.ANGULAR_TEMPLATE),
@@ -125,10 +126,11 @@ public class HTMLExtractor implements IExtractor {
source,
valueStart.getRow(),
valueStart.getColumn() + offset,
false /* isTypeScript */);
false /* isTypeScript */,
attr,
context);
} else if (source.startsWith("javascript:")) {
source = source.substring(11);
snippetLoC =
extractSnippet(
TopLevelKind.javascriptUrl,
config,
@@ -137,16 +139,12 @@ public class HTMLExtractor implements IExtractor {
source,
valueStart.getRow(),
valueStart.getColumn() + 11,
false /* isTypeScript */);
false /* isTypeScript */,
attr,
context);
}
}
}
if (snippetLoC != null) locInfo.add(snippetLoC);
}
public LoCInfo getLoCInfo() {
return this.locInfo;
}
}
@@ -202,7 +200,7 @@ public class HTMLExtractor implements IExtractor {
extractor.doit(Option.some(eltHandler));
return eltHandler.getLoCInfo();
return locInfo;
}
/**
@@ -270,7 +268,7 @@ public class HTMLExtractor implements IExtractor {
return val == null ? val : StringUtil.lc(val);
}
private LoCInfo extractSnippet(
private void extractSnippet(
TopLevelKind toplevelKind,
ExtractorConfig config,
ScopeManager scopeManager,
@@ -278,10 +276,18 @@ public class HTMLExtractor implements IExtractor {
String source,
int line,
int column,
boolean isTypeScript) {
boolean isTypeScript,
Segment parentHtmlNode,
HtmlPopulator.Context context) {
TrapWriter trapWriter = textualExtractor.getTrapwriter();
LocationManager locationManager = textualExtractor.getLocationManager();
LocationManager scriptLocationManager =
new LocationManager(
locationManager.getSourceFile(), trapWriter, locationManager.getFileLabel());
scriptLocationManager.setStart(line, column);
if (isTypeScript) {
if (isEmbedded) {
return null; // Do not extract files from HTML embedded in other files.
return; // Do not extract files from HTML embedded in other files.
}
Path file = textualExtractor.getExtractedFile().toPath();
FileSnippet snippet =
@@ -302,28 +308,36 @@ public class HTMLExtractor implements IExtractor {
}
state.getSnippets().put(virtualFile, snippet);
}
return null; // LoC info is accounted for later
Label topLevelLabel = ASTExtractor.makeTopLevelLabel(
textualExtractor.getTrapwriter(),
scriptLocationManager.getFileLabel(),
scriptLocationManager.getStartLine(),
scriptLocationManager.getStartColumn());
emitTopLevelXmlNodeBinding(parentHtmlNode, topLevelLabel, context, trapWriter);
// Note: LoC info is accounted for later, so not added here.
return;
}
TrapWriter trapwriter = textualExtractor.getTrapwriter();
LocationManager locationManager = textualExtractor.getLocationManager();
LocationManager scriptLocationManager =
new LocationManager(
locationManager.getSourceFile(), trapwriter, locationManager.getFileLabel());
scriptLocationManager.setStart(line, column);
JSExtractor extractor = new JSExtractor(config);
try {
TextualExtractor tx =
new TextualExtractor(
trapwriter,
trapWriter,
scriptLocationManager,
source,
config.getExtractLines(),
textualExtractor.getMetrics(),
textualExtractor.getExtractedFile());
return extractor.extract(tx, source, toplevelKind, scopeManager).snd();
Pair<Label, LoCInfo> result = extractor.extract(tx, source, toplevelKind, scopeManager);
emitTopLevelXmlNodeBinding(parentHtmlNode, result.fst(), context, trapWriter);
locInfo.add(result.snd());
} catch (ParseError e) {
e.setPosition(scriptLocationManager.translatePosition(e.getPosition()));
throw e.asUserError();
}
}
private void emitTopLevelXmlNodeBinding(Segment parentHtmlNode, Label topLevelLabel, HtmlPopulator.Context context, TrapWriter writer) {
Label htmlNodeLabel = context.getNodeLabel(parentHtmlNode);
writer.addTuple("toplevel_parent_xml_node", topLevelLabel, htmlNodeLabel);
}
}

View File

@@ -88,26 +88,7 @@ module HTML {
* Gets the inline script of the given attribute, if any.
*/
CodeInAttribute getCodeInAttribute(XMLAttribute attribute) {
exists(
string f, Location l1, int sl1, int sc1, int el1, int ec1, Location l2, int sl2, int sc2,
int el2, int ec2
|
l1 = attribute.getLocation() and
l2 = result.getLocation() and
l1.hasLocationInfo(f, sl1, sc1, el1, ec1) and
l2.hasLocationInfo(f, sl2, sc2, el2, ec2)
|
(
sl1 = sl2 and sc1 < sc2
or
sl1 < sl2
) and
(
el1 = el2 and ec1 > ec2
or
el1 > el2
)
)
toplevel_parent_xml_node(result, attribute)
}
/**
@@ -233,26 +214,7 @@ module HTML {
* Gets the inline script of this script element, if any.
*/
private InlineScript getInlineScript() {
exists(
string f, Location l1, int sl1, int sc1, int el1, int ec1, Location l2, int sl2, int sc2,
int el2, int ec2
|
l1 = getLocation() and
l2 = result.getLocation() and
l1.hasLocationInfo(f, sl1, sc1, el1, ec1) and
l2.hasLocationInfo(f, sl2, sc2, el2, ec2)
|
(
sl1 = sl2 and sc1 < sc2
or
sl1 < sl2
) and
(
el1 = el2 and ec1 > ec2
or
el1 > el2
)
) and
toplevel_parent_xml_node(result, this) and
// the src attribute has precedence
not exists(getSourcePath())
}

View File

@@ -131,6 +131,11 @@ is_nodejs (int tl: @toplevel ref);
is_es2015_module (int tl: @toplevel ref);
is_closure_module (int tl: @toplevel ref);
@xml_node_with_code = @xmlelement | @xmlattribute;
toplevel_parent_xml_node(
unique int toplevel: @toplevel ref,
int xmlnode: @xml_node_with_code ref);
// statements
#keyset[parent, idx]
stmts (unique int id: @stmt,