From 9391d366998ef3e29f2e56523549c76efd0f93b3 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Mon, 6 Jan 2020 09:24:46 +0000 Subject: [PATCH] JavaScript: Teach extractor to tolerate assignment patterns in AST. Our database representation of ASTs does not use assignment patterns, instead encoding the relevant information directly in the associated function/loop/assignment. We convert from an AST with assignment patterns to one without during parsing, so the extractor does not expect any assignment patterns to be present in the AST. Due to a bug in the parser, this can currently happen for malformed programs. While we should fix that bug once it gets fixed in Acorn, it also makes sense for the extractor to be more robust, so this PR teaches the `ASTExtractor` pass to raise a parse error when it encounters an assignment pattern, and all other passes to simply ignore them. --- .../src/com/semmle/js/ast/DefaultVisitor.java | 7 +- .../com/semmle/js/extractor/ASTExtractor.java | 26 ++-- .../com/semmle/js/extractor/JSExtractor.java | 2 +- .../src/com/semmle/js/extractor/Main.java | 2 +- .../input/invalid-assignment-pattern.js | 1 + .../trap/invalid-assignment-pattern.js.trap | 117 ++++++++++++++++++ 6 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 javascript/extractor/tests/errors/input/invalid-assignment-pattern.js create mode 100644 javascript/extractor/tests/errors/output/trap/invalid-assignment-pattern.js.trap diff --git a/javascript/extractor/src/com/semmle/js/ast/DefaultVisitor.java b/javascript/extractor/src/com/semmle/js/ast/DefaultVisitor.java index cb11b9376ea..9e08063d361 100644 --- a/javascript/extractor/src/com/semmle/js/ast/DefaultVisitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/DefaultVisitor.java @@ -97,8 +97,11 @@ public class DefaultVisitor implements Visitor { } @Override - public R visit(AssignmentPattern nd, C q) { - throw new CatastrophicError("Assignment patterns should not appear in the AST."); + public R visit(AssignmentPattern nd, C c) { + // assignment patterns should not appear in the AST, but can do for malformed + // programs; the ASTExtractor raises a ParseError in this case, other visitors + // should just ignore them + return visit(nd.getLeft(), c); } @Override diff --git a/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java index c2a1126070d..4bc7d0fbdd3 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java @@ -1,5 +1,11 @@ package com.semmle.js.extractor; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.Stack; + import com.semmle.js.ast.AClass; import com.semmle.js.ast.AFunction; import com.semmle.js.ast.AFunctionExpression; @@ -7,6 +13,7 @@ import com.semmle.js.ast.ArrayExpression; import com.semmle.js.ast.ArrayPattern; import com.semmle.js.ast.ArrowFunctionExpression; import com.semmle.js.ast.AssignmentExpression; +import com.semmle.js.ast.AssignmentPattern; import com.semmle.js.ast.AwaitExpression; import com.semmle.js.ast.BinaryExpression; import com.semmle.js.ast.BindExpression; @@ -103,6 +110,7 @@ import com.semmle.js.extractor.ExtractorConfig.Platform; import com.semmle.js.extractor.ExtractorConfig.SourceType; import com.semmle.js.extractor.ScopeManager.DeclKind; import com.semmle.js.extractor.ScopeManager.Scope; +import com.semmle.js.parser.ParseError; import com.semmle.ts.ast.ArrayTypeExpr; import com.semmle.ts.ast.ConditionalTypeExpr; import com.semmle.ts.ast.DecoratorList; @@ -144,11 +152,6 @@ import com.semmle.ts.ast.UnionTypeExpr; import com.semmle.util.collections.CollectionUtil; import com.semmle.util.trap.TrapWriter; import com.semmle.util.trap.TrapWriter.Label; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Set; -import java.util.Stack; /** Extractor for AST-based information; invoked by the {@link JSExtractor}. */ public class ASTExtractor { @@ -307,6 +310,7 @@ public class ASTExtractor { private final Platform platform; private final SourceType sourceType; private boolean isStrict; + private List additionalErrors = new ArrayList<>(); public V(Platform platform, SourceType sourceType) { this.platform = platform; @@ -2054,14 +2058,22 @@ public class ASTExtractor { visit(nd.getRight(), key, 1, IdContext.label); return key; } + + @Override + public Label visit(AssignmentPattern nd, Context c) { + additionalErrors.add(new ParseError("Unexpected assignment pattern.", nd.getLoc().getStart())); + return super.visit(nd, c); + } } - public void extract(Node root, Platform platform, SourceType sourceType, int toplevelKind) { + public List extract(Node root, Platform platform, SourceType sourceType, int toplevelKind) { lexicalExtractor.getMetrics().startPhase(ExtractionPhase.ASTExtractor_extract); trapwriter.addTuple("toplevels", toplevelLabel, toplevelKind); locationManager.emitNodeLocation(root, toplevelLabel); - root.accept(new V(platform, sourceType), null); + V visitor = new V(platform, sourceType); + root.accept(visitor, null); lexicalExtractor.getMetrics().stopPhase(ExtractionPhase.ASTExtractor_extract); + return visitor.additionalErrors; } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java index 45dd4648220..2ce224b0225 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/JSExtractor.java @@ -113,7 +113,7 @@ public class JSExtractor { new JSDocExtractor(textualExtractor).extract(lexicalExtractor.getComments()); lexicalExtractor.purge(); - scriptExtractor.extract(ast, platform, sourceType, toplevelKind); + parserRes.getErrors().addAll(scriptExtractor.extract(ast, platform, sourceType, toplevelKind)); new CFGExtractor(scriptExtractor).extract(ast); } else { lexicalExtractor = diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index 20016a0849d..4f522513873 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -37,7 +37,7 @@ public class Main { * A version identifier that should be updated every time the extractor changes in such a way that * it may produce different tuples for the same file under the same {@link ExtractorConfig}. */ - public static final String EXTRACTOR_VERSION = "2019-11-26"; + public static final String EXTRACTOR_VERSION = "2020-01-06"; public static final Pattern NEWLINE = Pattern.compile("\n"); diff --git a/javascript/extractor/tests/errors/input/invalid-assignment-pattern.js b/javascript/extractor/tests/errors/input/invalid-assignment-pattern.js new file mode 100644 index 00000000000..bfe2b3cd024 --- /dev/null +++ b/javascript/extractor/tests/errors/input/invalid-assignment-pattern.js @@ -0,0 +1 @@ +(x = 0) = y diff --git a/javascript/extractor/tests/errors/output/trap/invalid-assignment-pattern.js.trap b/javascript/extractor/tests/errors/output/trap/invalid-assignment-pattern.js.trap new file mode 100644 index 00000000000..858b3e94e70 --- /dev/null +++ b/javascript/extractor/tests/errors/output/trap/invalid-assignment-pattern.js.trap @@ -0,0 +1,117 @@ +#10000=@"/invalid-assignment-pattern.js;sourcefile" +files(#10000,"/invalid-assignment-pattern.js","invalid-assignment-pattern","js",0) +#10001=@"/;folder" +folders(#10001,"/","") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"(x = 0) = y"," +") +#20003=@"loc,{#10000},1,1,1,11" +locations_default(#20003,#10000,1,1,1,11) +hasLocation(#20002,#20003) +numlines(#20001,1,1,0) +#20004=* +tokeninfo(#20004,8,#20001,0,"(") +#20005=@"loc,{#10000},1,1,1,1" +locations_default(#20005,#10000,1,1,1,1) +hasLocation(#20004,#20005) +#20006=* +tokeninfo(#20006,6,#20001,1,"x") +#20007=@"loc,{#10000},1,2,1,2" +locations_default(#20007,#10000,1,2,1,2) +hasLocation(#20006,#20007) +#20008=* +tokeninfo(#20008,8,#20001,2,"=") +#20009=@"loc,{#10000},1,4,1,4" +locations_default(#20009,#10000,1,4,1,4) +hasLocation(#20008,#20009) +#20010=* +tokeninfo(#20010,3,#20001,3,"0") +#20011=@"loc,{#10000},1,6,1,6" +locations_default(#20011,#10000,1,6,1,6) +hasLocation(#20010,#20011) +#20012=* +tokeninfo(#20012,8,#20001,4,")") +#20013=@"loc,{#10000},1,7,1,7" +locations_default(#20013,#10000,1,7,1,7) +hasLocation(#20012,#20013) +#20014=* +tokeninfo(#20014,8,#20001,5,"=") +#20015=@"loc,{#10000},1,9,1,9" +locations_default(#20015,#10000,1,9,1,9) +hasLocation(#20014,#20015) +#20016=* +tokeninfo(#20016,6,#20001,6,"y") +#20017=@"loc,{#10000},1,11,1,11" +locations_default(#20017,#10000,1,11,1,11) +hasLocation(#20016,#20017) +#20018=* +tokeninfo(#20018,0,#20001,7,"") +#20019=@"loc,{#10000},2,1,2,0" +locations_default(#20019,#10000,2,1,2,0) +hasLocation(#20018,#20019) +toplevels(#20001,0) +#20020=@"loc,{#10000},1,1,2,0" +locations_default(#20020,#10000,1,1,2,0) +hasLocation(#20001,#20020) +#20021=* +stmts(#20021,2,#20001,0,"(x = 0) = y") +hasLocation(#20021,#20003) +stmtContainers(#20021,#20001) +#20022=* +exprs(#20022,47,#20021,0,"(x = 0) = y") +hasLocation(#20022,#20003) +enclosingStmt(#20022,#20021) +exprContainers(#20022,#20001) +#20023=* +exprs(#20023,63,#20022,0,"(x = 0)") +#20024=@"loc,{#10000},1,1,1,7" +locations_default(#20024,#10000,1,1,1,7) +hasLocation(#20023,#20024) +enclosingStmt(#20023,#20021) +exprContainers(#20023,#20001) +#20025=* +exprs(#20025,79,#20023,0,"x") +hasLocation(#20025,#20007) +enclosingStmt(#20025,#20021) +exprContainers(#20025,#20001) +#20026=* +exprs(#20026,79,#20022,1,"y") +hasLocation(#20026,#20017) +enclosingStmt(#20026,#20021) +exprContainers(#20026,#20001) +literals("y","y",#20026) +#20027=@"var;{y};{#20000}" +variables(#20027,"y",#20000) +bind(#20026,#20027) +#20028=* +entry_cfg_node(#20028,#20001) +#20029=@"loc,{#10000},1,1,1,0" +locations_default(#20029,#10000,1,1,1,0) +hasLocation(#20028,#20029) +#20030=* +exit_cfg_node(#20030,#20001) +hasLocation(#20030,#20019) +successor(#20021,#20023) +successor(#20026,#20022) +successor(#20023,#20025) +successor(#20025,#20026) +successor(#20022,#20030) +successor(#20028,#20021) +#20031=* +jsParseErrors(#20031,#20001,"Error: Unexpected assignment pattern.","(x = 0) = y +") +hasLocation(#20031,#20007) +#20032=* +lines(#20032,#20001,"(x = 0) = y"," +") +hasLocation(#20032,#20003) +numlines(#20001,1,0,0) +numlines(#10000,1,1,0) +filetype(#10000,"javascript")