JavaScript: Add parser support for E4X.

This commit is contained in:
Max Schaefer
2019-01-29 15:49:37 +00:00
parent 1ad4867f2a
commit f3ea810c21
23 changed files with 1413 additions and 13 deletions

View File

@@ -4,14 +4,17 @@ import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import com.semmle.jcorn.TokenType.Properties;
import com.semmle.jcorn.flow.FlowParser;
import com.semmle.js.ast.ArrayExpression;
import com.semmle.js.ast.AssignmentExpression;
import com.semmle.js.ast.BlockStatement;
import com.semmle.js.ast.CallExpression;
import com.semmle.js.ast.CatchClause;
import com.semmle.js.ast.ClassExpression;
import com.semmle.js.ast.ComprehensionBlock;
import com.semmle.js.ast.ComprehensionExpression;
import com.semmle.js.ast.Decorator;
import com.semmle.js.ast.Expression;
import com.semmle.js.ast.ExpressionStatement;
import com.semmle.js.ast.ForInStatement;
@@ -28,8 +31,14 @@ import com.semmle.js.ast.Node;
import com.semmle.js.ast.Position;
import com.semmle.js.ast.SourceLocation;
import com.semmle.js.ast.Statement;
import com.semmle.js.ast.Token;
import com.semmle.js.ast.TryStatement;
import com.semmle.js.ast.VariableDeclaration;
import com.semmle.js.ast.XMLAnyName;
import com.semmle.js.ast.XMLAttributeSelector;
import com.semmle.js.ast.XMLDotDotExpression;
import com.semmle.js.ast.XMLFilterExpression;
import com.semmle.js.ast.XMLQualifiedIdentifier;
import com.semmle.util.data.Pair;
/**
@@ -155,6 +164,20 @@ public class CustomParser extends FlowParser {
List<Expression> args = this.parseExprList(TokenType.parenR, false, false, null);
CallExpression node = new CallExpression(new SourceLocation(startLoc), name, new ArrayList<>(), args, false, false);
return this.finishNode(node);
} else if (options.e4x() && this.type == at) {
// this could be either a decorator or an attribute selector; we first
// try parsing it as a decorator, and then convert it to an attribute selector
// if the next token turns out not to be `class`
List<Decorator> decorators = parseDecorators();
Expression attr = null;
if (decorators.size() > 1 ||
this.type == TokenType._class ||
((attr = decoratorToAttributeSelector(decorators.get(0))) == null)) {
ClassExpression ce = (ClassExpression) this.parseClass(startLoc, false);
ce.addDecorators(decorators);
return ce;
}
return attr;
} else {
return super.parseExprAtom(refDestructuringErrors);
}
@@ -320,4 +343,170 @@ public class CustomParser extends FlowParser {
}
return res;
}
/*
* E4X
*
* PrimaryExpression :
* PropertyIdentifier
* XMLInitialiser
* XMLListInitialiser
*
* PropertyIdentifier :
* AttributeIdentifier
* QualifiedIdentifier
* WildcardIdent
*
* AttributeIdentifier :
* @ PropertySelector
* @ QualifiedIdentifier
* @ [ Expression ]
*
* PropertySelector :
* Identifier
* WildcardIdentifier
*
* QualifiedIdentifier :
* PropertySelector :: PropertySelector
* PropertySelector :: [ Expression ]
*
* WildcardIdentifier :
* *
*
* MemberExpression :
* MemberExpression . PropertyIdentifier
* MemberExpression .. Identifier
* MemberExpression .. PropertyIdentifier
* MemberExpression . ( Expression )
*
* DefaultXMLNamespaceStatement :
* default xml namespace = Expression
*/
protected TokenType doubleDot = new TokenType(new Properties(":").beforeExpr());
@Override
protected Token getTokenFromCode(int code) {
if (options.e4x() && code == '.' && charAt(this.pos+1) == '.' && charAt(this.pos+2) != '.') {
this.pos += 2;
return this.finishToken(doubleDot);
}
return super.getTokenFromCode(code);
}
// add parsing of E4X property, attribute and descendant accesses, as well as filter expressions
@Override
protected Pair<Expression, Boolean> parseSubscript(Expression base, Position startLoc, boolean noCalls) {
if (options.e4x() && this.eat(TokenType.dot)) {
SourceLocation start = new SourceLocation(startLoc);
if (this.eat(TokenType.parenL)) {
Expression filter = parseExpression(false, null);
this.expect(TokenType.parenR);
return Pair.make(this.finishNode(new XMLFilterExpression(start, base, filter)), true);
}
Expression property = this.parsePropertyIdentifierOrIdentifier();
MemberExpression node = new MemberExpression(start, base, property, false, false, isOnOptionalChain(false, base));
return Pair.make(this.finishNode(node), true);
} else if (this.eat(doubleDot)) {
SourceLocation start = new SourceLocation(startLoc);
Expression property = this.parsePropertyIdentifierOrIdentifier();
return Pair.make(this.finishNode(new XMLDotDotExpression(start, base, property)), true);
}
return super.parseSubscript(base, startLoc, noCalls);
}
/**
* Parse a an attribute identifier, a wildcard identifier, a qualified identifier,
* or a plain identifier.
*/
protected Expression parsePropertyIdentifierOrIdentifier() {
Position start = this.startLoc;
if (this.eat(at)) {
// attribute identifier
return parseAttributeIdentifier(new SourceLocation(start));
} else {
return parsePossiblyQualifiedIdentifier();
}
}
/**
* Parse a wildcard identifier, a qualified identifier, or a plain identifier.
*/
protected Expression parsePossiblyQualifiedIdentifier() {
SourceLocation start = new SourceLocation(startLoc);
Expression res = parsePropertySelector(start);
if (!this.eat(doubleColon))
return res;
if (this.eat(TokenType.bracketL)) {
Expression e = parseExpression(false, null);
this.expect(TokenType.bracketR);
return this.finishNode(new XMLQualifiedIdentifier(start, res, e, true));
} else {
Expression e = parsePropertySelector(new SourceLocation(startLoc));
return this.finishNode(new XMLQualifiedIdentifier(start, res, e, false));
}
}
/**
* Parse a property selector, that is, either a wildcard identifier or a plain identifier.
*/
protected Expression parsePropertySelector(SourceLocation start) {
Expression res;
if (this.eat(TokenType.star)) {
// wildcard identifier
res = this.finishNode(new XMLAnyName(start));
} else {
res = this.parseIdent(true);
}
return res;
}
/**
* Parse an attribute identifier, either computed ({@code [ Expr ]}) or a possibly
* qualified identifier.
*/
protected Expression parseAttributeIdentifier(SourceLocation start) {
if (this.eat(TokenType.bracketL)) {
Expression idx = parseExpression(false, null);
this.expect(TokenType.bracketR);
return this.finishNode(new XMLAttributeSelector(start, idx, true));
} else {
return this.finishNode(new XMLAttributeSelector(start, parsePossiblyQualifiedIdentifier(), false));
}
}
@Override
protected Expression parseDecoratorBody() {
SourceLocation start = new SourceLocation(startLoc);
if (options.e4x() && this.eat(TokenType.bracketL)) {
// this must be an attribute selector, so only allow a single expression
// followed by a right bracket, which will later be converted by
// `decoratorToAttributeSelector` below
List<Expression> elements = new ArrayList<>();
elements.add(parseExpression(false, null));
this.expect(TokenType.bracketR);
return this.finishNode(new ArrayExpression(start, elements));
}
return super.parseDecoratorBody();
}
/**
* Convert a decorator that resulted from mis-parsing an attribute selector into
* an attribute selector.
*/
protected XMLAttributeSelector decoratorToAttributeSelector(Decorator d) {
Expression e = d.getExpression();
if (e instanceof ArrayExpression) {
ArrayExpression ae = (ArrayExpression) e;
if (ae.getElements().size() == 1)
return new XMLAttributeSelector(d.getLoc(), ae.getElements().get(0), true);
} else if (e instanceof Identifier) {
return new XMLAttributeSelector(d.getLoc(), e, false);
}
return null;
}
}

View File

@@ -32,7 +32,7 @@ public class Options {
}
private boolean allowHashBang, allowReturnOutsideFunction, allowImportExportEverywhere;
private boolean preserveParens, mozExtensions, jscript, esnext, v8Extensions;
private boolean preserveParens, mozExtensions, jscript, esnext, v8Extensions, e4x;
private int ecmaVersion;
private AllowReserved allowReserved;
private String sourceType;
@@ -59,6 +59,7 @@ public class Options {
this.jscript = false;
this.esnext = false;
this.v8Extensions = false;
this.e4x = false;
this.onRecoverableError = null;
}
@@ -71,6 +72,7 @@ public class Options {
this.jscript = that.jscript;
this.esnext = that.esnext;
this.v8Extensions = that.v8Extensions;
this.e4x = that.e4x;
this.ecmaVersion = that.ecmaVersion;
this.allowReserved = that.allowReserved;
this.sourceType = that.sourceType;
@@ -114,6 +116,10 @@ public class Options {
return v8Extensions;
}
public boolean e4x() {
return e4x;
}
public Identifiers.Dialect getDialect() {
switch (ecmaVersion) {
case 3:
@@ -183,6 +189,10 @@ public class Options {
this.v8Extensions = v8Extensions;
}
public void e4x(boolean e4x) {
this.e4x = e4x;
}
public Options preserveParens(boolean preserveParens) {
this.preserveParens = preserveParens;
return this;

View File

@@ -1478,7 +1478,7 @@ public class Parser {
}
}
private boolean isOnOptionalChain(boolean optional, Expression base) {
protected boolean isOnOptionalChain(boolean optional, Expression base) {
return optional || base instanceof Chainable && ((Chainable)base).isOnOptionalChain();
}

View File

@@ -734,4 +734,29 @@ public class DefaultVisitor<C, R> implements Visitor<C, R> {
public R visit(RestTypeExpr nd, C c) {
return visit((TypeExpression) nd, c);
}
@Override
public R visit(XMLAnyName nd, C c) {
return visit((Expression) nd, c);
}
@Override
public R visit(XMLAttributeSelector nd, C c) {
return visit((Expression) nd, c);
}
@Override
public R visit(XMLFilterExpression nd, C c) {
return visit((Expression) nd, c);
}
@Override
public R visit(XMLQualifiedIdentifier nd, C c) {
return visit((Expression) nd, c);
}
@Override
public R visit(XMLDotDotExpression nd, C c) {
return visit((Expression) nd, c);
}
}

View File

@@ -18,7 +18,7 @@ public abstract class MemberDefinition<V extends Expression> extends Node {
/**
* The name of the member.
*
* If {@link #isComputed} is false, this must be an {@link Identifier}, otherwise
* If {@link #isComputed()} is false, this must be an {@link Identifier}, otherwise
* it can be an arbitrary expression.
*/
private final Expression key;

View File

@@ -704,4 +704,29 @@ public class NodeCopier implements Visitor<Void, INode> {
public INode visit(RestTypeExpr nd, Void c) {
return new RestTypeExpr(visit(nd.getLoc()), copy(nd.getArrayType()));
}
@Override
public INode visit(XMLAnyName nd, Void c) {
return new XMLAnyName(visit(nd.getLoc()));
}
@Override
public INode visit(XMLAttributeSelector nd, Void c) {
return new XMLAttributeSelector(visit(nd.getLoc()), copy(nd.getAttribute()), nd.isComputed());
}
@Override
public INode visit(XMLFilterExpression nd, Void c) {
return new XMLFilterExpression(visit(nd.getLoc()), copy(nd.getLeft()), copy(nd.getRight()));
}
@Override
public INode visit(XMLQualifiedIdentifier nd, Void c) {
return new XMLQualifiedIdentifier(visit(nd.getLoc()), copy(nd.getLeft()), copy(nd.getRight()), nd.isComputed());
}
@Override
public INode visit(XMLDotDotExpression nd, Void c) {
return new XMLDotDotExpression(visit(nd.getLoc()), copy(nd.getLeft()), copy(nd.getRight()));
}
}

View File

@@ -174,4 +174,9 @@ public interface Visitor<C, R> {
public R visit(ImportTypeExpr nd, C c);
public R visit(OptionalTypeExpr nd, C c);
public R visit(RestTypeExpr nd, C c);
public R visit(XMLAnyName nd, C c);
public R visit(XMLAttributeSelector nd, C c);
public R visit(XMLFilterExpression nd, C c);
public R visit(XMLQualifiedIdentifier nd, C c);
public R visit(XMLDotDotExpression nd, C c);
}

View File

@@ -0,0 +1,13 @@
package com.semmle.js.ast;
public class XMLAnyName extends Expression {
public XMLAnyName(SourceLocation loc) {
super("XMLAnyName", loc);
}
@Override
public <C, R> R accept(Visitor<C, R> v, C c) {
return v.visit(this, c);
}
}

View File

@@ -0,0 +1,26 @@
package com.semmle.js.ast;
public class XMLAttributeSelector extends Expression {
final Expression attribute;
final boolean computed;
public XMLAttributeSelector(SourceLocation loc, Expression attribute, boolean computed) {
super("XMLAttributeSelector", loc);
this.attribute = attribute;
this.computed = computed;
}
public Expression getAttribute() {
return attribute;
}
public boolean isComputed() {
return computed;
}
@Override
public <C, R> R accept(Visitor<C, R> v, C c) {
return v.visit(this, c);
}
}

View File

@@ -0,0 +1,25 @@
package com.semmle.js.ast;
public class XMLDotDotExpression extends Expression {
final Expression left, right;
public XMLDotDotExpression(SourceLocation loc, Expression left, Expression right) {
super("XMLDotDotExpression", loc);
this.left = left;
this.right = right;
}
public Expression getLeft() {
return left;
}
public Expression getRight() {
return right;
}
@Override
public <C, R> R accept(Visitor<C, R> v, C c) {
return v.visit(this, c);
}
}

View File

@@ -0,0 +1,25 @@
package com.semmle.js.ast;
public class XMLFilterExpression extends Expression {
final Expression left, right;
public XMLFilterExpression(SourceLocation loc, Expression left, Expression right) {
super("XMLFilterExpression", loc);
this.left = left;
this.right = right;
}
public Expression getLeft() {
return left;
}
public Expression getRight() {
return right;
}
@Override
public <C, R> R accept(Visitor<C, R> v, C c) {
return v.visit(this, c);
}
}

View File

@@ -0,0 +1,31 @@
package com.semmle.js.ast;
public class XMLQualifiedIdentifier extends Expression {
final Expression left, right;
final boolean computed;
public XMLQualifiedIdentifier(SourceLocation loc, Expression left, Expression right, boolean computed) {
super("XMLQualifiedIdentifier", loc);
this.left = left;
this.right = right;
this.computed = computed;
}
public Expression getLeft() {
return left;
}
public Expression getRight() {
return right;
}
public boolean isComputed() {
return computed;
}
@Override
public <C, R> R accept(Visitor<C, R> v, C c) {
return v.visit(this, c);
}
}

View File

@@ -90,6 +90,10 @@ import com.semmle.js.ast.VariableDeclaration;
import com.semmle.js.ast.VariableDeclarator;
import com.semmle.js.ast.WhileStatement;
import com.semmle.js.ast.WithStatement;
import com.semmle.js.ast.XMLAttributeSelector;
import com.semmle.js.ast.XMLDotDotExpression;
import com.semmle.js.ast.XMLFilterExpression;
import com.semmle.js.ast.XMLQualifiedIdentifier;
import com.semmle.js.ast.YieldExpression;
import com.semmle.js.ast.jsx.IJSXName;
import com.semmle.js.ast.jsx.JSXAttribute;
@@ -1878,6 +1882,37 @@ public class ASTExtractor {
visit(nd.getArrayType(), key, 0, IdContext.typeBind);
return key;
}
@Override
public Label visit(XMLAttributeSelector nd, Context c) {
Label key = super.visit(nd, c);
visit(nd.getAttribute(), key, 0, IdContext.label);
return key;
}
@Override
public Label visit(XMLFilterExpression nd, Context c) {
Label key = super.visit(nd, c);
visit(nd.getLeft(), key, 0);
visit(nd.getRight(), key, 1);
return key;
}
@Override
public Label visit(XMLQualifiedIdentifier nd, Context c) {
Label key = super.visit(nd, c);
visit(nd.getLeft(), key, 0);
visit(nd.getRight(), key, 1, nd.isComputed() ? IdContext.varBind : IdContext.label);
return key;
}
@Override
public Label visit(XMLDotDotExpression nd, Context c) {
Label key = super.visit(nd, c);
visit(nd.getLeft(), key, 0);
visit(nd.getRight(), key, 1, IdContext.label);
return key;
}
}
public void extract(Node root, Platform platform, SourceType sourceType, int toplevelKind) {

View File

@@ -91,6 +91,11 @@ import com.semmle.js.ast.VariableDeclarator;
import com.semmle.js.ast.Visitor;
import com.semmle.js.ast.WhileStatement;
import com.semmle.js.ast.WithStatement;
import com.semmle.js.ast.XMLAnyName;
import com.semmle.js.ast.XMLAttributeSelector;
import com.semmle.js.ast.XMLDotDotExpression;
import com.semmle.js.ast.XMLFilterExpression;
import com.semmle.js.ast.XMLQualifiedIdentifier;
import com.semmle.js.ast.YieldExpression;
import com.semmle.js.ast.jsx.IJSXName;
import com.semmle.js.ast.jsx.JSXAttribute;
@@ -508,6 +513,26 @@ public class CFGExtractor {
return nd.getRight().accept(this, null);
}
@Override
public Node visit(XMLAttributeSelector nd, Void c) {
return nd.getAttribute().accept(this, c);
}
@Override
public Node visit(XMLFilterExpression nd, Void c) {
return nd.getLeft().accept(this, c);
}
@Override
public Node visit(XMLQualifiedIdentifier nd, Void c) {
return nd.getLeft().accept(this, c);
}
@Override
public Node visit(XMLDotDotExpression nd, Void c) {
return nd.getLeft().accept(this, c);
}
public static Node of(Node nd) {
return nd.accept(new First(), null);
}
@@ -1965,6 +1990,40 @@ public class CFGExtractor {
succ(nd, c.getAllSuccessors());
return null;
}
@Override
public Void visit(XMLAnyName nd, SuccessorInfo c) {
succ(nd, c.getAllSuccessors());
return null;
}
@Override
public Void visit(XMLAttributeSelector nd, SuccessorInfo c) {
seq(nd.getAttribute(), nd);
succ(nd, c.getAllSuccessors());
return null;
}
@Override
public Void visit(XMLFilterExpression nd, SuccessorInfo c) {
seq(nd.getLeft(), nd.getRight(), nd);
succ(nd, c.getAllSuccessors());
return null;
}
@Override
public Void visit(XMLQualifiedIdentifier nd, SuccessorInfo c) {
seq(nd.getLeft(), nd.getRight(), nd);
succ(nd, c.getAllSuccessors());
return null;
}
@Override
public Void visit(XMLDotDotExpression nd, SuccessorInfo c) {
seq(nd.getLeft(), nd.getRight(), nd);
succ(nd, c.getAllSuccessors());
return null;
}
}
public void extract(Node nd) {

View File

@@ -20,6 +20,11 @@ import com.semmle.js.ast.MemberExpression;
import com.semmle.js.ast.MetaProperty;
import com.semmle.js.ast.UnaryExpression;
import com.semmle.js.ast.UpdateExpression;
import com.semmle.js.ast.XMLAnyName;
import com.semmle.js.ast.XMLAttributeSelector;
import com.semmle.js.ast.XMLDotDotExpression;
import com.semmle.js.ast.XMLFilterExpression;
import com.semmle.js.ast.XMLQualifiedIdentifier;
import com.semmle.js.ast.jsx.JSXIdentifier;
import com.semmle.js.ast.jsx.JSXMemberExpression;
import com.semmle.js.ast.jsx.JSXSpreadAttribute;
@@ -266,6 +271,31 @@ public class ExprKinds {
public Integer visit(DecoratorList nd, Void c) {
return 104;
}
@Override
public Integer visit(XMLAnyName nd, Void c) {
return 108;
}
@Override
public Integer visit(XMLAttributeSelector nd, Void c) {
return nd.isComputed() ? 110 : 109;
}
@Override
public Integer visit(XMLFilterExpression nd, Void c) {
return 111;
}
@Override
public Integer visit(XMLQualifiedIdentifier nd, Void c) {
return nd.isComputed() ? 113 : 112;
}
@Override
public Integer visit(XMLDotDotExpression nd, Void c) {
return 114;
}
}, null);
if (kind == null)
throw new CatastrophicError("Unsupported expression kind: " + expr.getClass());

View File

@@ -207,6 +207,9 @@ public class ExtractorConfig {
/** Should v8-specific language extensions be supported? */
private boolean v8Extensions;
/** Should E4X syntax be supported? */
private boolean e4x;
/** Should parse errors be reported as violations instead of aborting extraction? */
private boolean tolerateParseErrors;
@@ -248,6 +251,7 @@ public class ExtractorConfig {
this.v8Extensions = true;
}
this.typescriptMode = TypeScriptMode.NONE;
this.e4x = experimental;
this.defaultEncoding = StandardCharsets.UTF_8.name();
}
@@ -260,6 +264,7 @@ public class ExtractorConfig {
this.jsx = that.jsx;
this.esnext = that.esnext;
this.v8Extensions = that.v8Extensions;
this.e4x = that.e4x;
this.tolerateParseErrors = that.tolerateParseErrors;
this.fileType = that.fileType;
this.sourceType = that.sourceType;
@@ -340,6 +345,16 @@ public class ExtractorConfig {
return res;
}
public boolean isE4X() {
return e4x;
}
public ExtractorConfig withE4X(boolean e4x) {
ExtractorConfig res = new ExtractorConfig(this);
res.e4x = e4x;
return res;
}
public boolean isTolerateParseErrors() {
return tolerateParseErrors;
}
@@ -440,14 +455,11 @@ public class ExtractorConfig {
@Override
public String toString() {
return "ExtractorConfig [ecmaVersion=" + ecmaVersion + ", externs="
+ externs + ", platform=" + platform + ", mozExtensions="
+ mozExtensions + ", jscript=" + jscript + ", jsx=" + jsx
+ ", esnext=" + esnext + ", v8Extensions=" + v8Extensions
+ ", tolerateParseErrors=" + tolerateParseErrors
+ ", htmlHandling=" + htmlHandling + ", fileType=" + fileType
+ ", sourceType=" + sourceType + ", extractLines="
+ extractLines + ", typescript=" + typescriptMode
+ ", defaultEncoding=" + defaultEncoding + "]";
return "ExtractorConfig [ecmaVersion=" + ecmaVersion + ", externs=" + externs + ", platform=" + platform
+ ", mozExtensions=" + mozExtensions + ", jscript=" + jscript + ", jsx=" + jsx + ", esnext=" + esnext
+ ", v8Extensions=" + v8Extensions + ", e4x=" + e4x + ", tolerateParseErrors=" + tolerateParseErrors
+ ", htmlHandling=" + htmlHandling + ", fileType=" + fileType + ", sourceType=" + sourceType
+ ", extractLines=" + extractLines + ", typescriptMode=" + typescriptMode + ", defaultEncoding="
+ defaultEncoding + "]";
}
}

View File

@@ -332,7 +332,7 @@ public class Main {
argsParser.addFlag(P_EXPERIMENTAL, 0, "Enable experimental support for pending ECMAScript proposals "
+ "(public class fields, function.sent, decorators, export extensions, function bind, "
+ "parameter-less catch, dynamic import, numeric separators, bigints), "
+ "as well as other language extensions (JScript, Mozilla and v8-specific extensions) and full HTML extraction.");
+ "as well as other language extensions (E4X, JScript, Mozilla and v8-specific extensions) and full HTML extraction.");
argsParser.addFlag(P_EXTERNS, 0, "Extract the given JavaScript files as Closure-style externs.");
argsParser.addFlag(P_EXTRACT_PROGRAM_TEXT, 0, "Extract a representation of the textual content of the program "
+ "(in addition to its syntactic structure).");

View File

@@ -37,6 +37,8 @@ public class JcornWrapper {
options.esnext(true);
if (config.isV8Extensions())
options.v8Extensions(true);
if (config.isE4X())
options.e4x(true);
Program program = null;
List<ParseError> errors = new ArrayList<>();