diff --git a/javascript/extractor/src/com/semmle/jcorn/CustomParser.java b/javascript/extractor/src/com/semmle/jcorn/CustomParser.java index b780cb10eaa..fbcf40f6b1a 100644 --- a/javascript/extractor/src/com/semmle/jcorn/CustomParser.java +++ b/javascript/extractor/src/com/semmle/jcorn/CustomParser.java @@ -545,22 +545,52 @@ public class CustomParser extends FlowParser { } @Override - protected Either jsx_readChunk(StringBuilder out, int chunkStart, int ch) { - // skip HTML comments (which are allowed in E4X, but not in JSX) - if (this.options.e4x() && ch == '<' && charAt(this.pos+1) == '!' && - charAt(this.pos+2) == '-' && charAt(this.pos+3) == '-') { - out.append(inputSubstring(chunkStart, this.pos)); - this.pos += 4; - while (this.pos+2 < this.input.length()) { - if (charAt(this.pos) == '-' && charAt(this.pos+1) == '-' && charAt(this.pos+2) == '>') { - this.pos += 3; + protected Token readToken(int code) { + // skip XML processing instructions (which are allowed in E4X, but not in JSX) + if (this.options.e4x()) { + while (code == '<') { + if (charAt(this.pos+1) == '?') { + this.pos += 2; + jsx_readUntil("?>"); + } else { break; } - ++this.pos; + this.skipSpace(); + code = this.fullCharCodeAtPos(); + } + } + return super.readToken(code); + } + + @Override + protected Either jsx_readChunk(StringBuilder out, int chunkStart, int ch) { + // skip XML comments and processing instructions (which are allowed in E4X, but not in JSX) + if (this.options.e4x() && ch == '<') { + if (inputSubstring(this.pos+1, this.pos+4).equals("!--")) { + out.append(inputSubstring(chunkStart, this.pos)); + this.pos += 4; + jsx_readUntil("-->"); + return Either.left(this.pos); + } else if (charAt(this.pos+1) == '?') { + out.append(inputSubstring(chunkStart, this.pos)); + this.pos += 2; + jsx_readUntil("?>"); + return Either.left(this.pos); } - return Either.left(this.pos); } return super.jsx_readChunk(out, chunkStart, ch); } + + private void jsx_readUntil(String terminator) { + char fst = terminator.charAt(0); + while (this.pos+terminator.length() <= this.input.length()) { + if (charAt(this.pos) == fst && + inputSubstring(this.pos, this.pos+terminator.length()).equals(terminator)) { + this.pos += terminator.length(); + break; + } + ++this.pos; + } + } } diff --git a/javascript/extractor/tests/e4x/input/tst.js b/javascript/extractor/tests/e4x/input/tst.js index 01f5a19003d..eeca15674d9 100644 --- a/javascript/extractor/tests/e4x/input/tst.js +++ b/javascript/extractor/tests/e4x/input/tst.js @@ -7,6 +7,6 @@ message.soap::Body; items.@[f()]; message.soap::[g()]; -var e = +var e = - ; \ No newline at end of file + ; diff --git a/javascript/extractor/tests/e4x/output/trap/tst.js.trap b/javascript/extractor/tests/e4x/output/trap/tst.js.trap index 054803d7bad..471f0162af1 100644 --- a/javascript/extractor/tests/e4x/output/trap/tst.js.trap +++ b/javascript/extractor/tests/e4x/output/trap/tst.js.trap @@ -64,10 +64,10 @@ lines(#20018,#20001,""," locations_default(#20019,#10000,9,1,9,0) hasLocation(#20018,#20019) #20020=* -lines(#20020,#20001,"var e = "," +lines(#20020,#20001,"var e = "," ") -#20021=@"loc,{#10000},10,1,10,13" -locations_default(#20021,#10000,10,1,10,13) +#20021=@"loc,{#10000},10,1,10,52" +locations_default(#20021,#10000,10,1,10,52) hasLocation(#20020,#20021) #20022=* lines(#20022,#20001," "," @@ -77,7 +77,8 @@ locations_default(#20023,#10000,11,1,11,18) hasLocation(#20022,#20023) indentation(#10000,11," ",2) #20024=* -lines(#20024,#20001," ;","") +lines(#20024,#20001," ;"," +") #20025=@"loc,{#10000},12,1,12,9" locations_default(#20025,#10000,12,1,12,9) hasLocation(#20024,#20025) @@ -424,26 +425,26 @@ tokeninfo(#20160,8,#20001,67,"=") locations_default(#20161,#10000,10,7,10,7) hasLocation(#20160,#20161) #20162=* -tokeninfo(#20162,8,#20001,68,"<") -#20163=@"loc,{#10000},10,9,10,9" -locations_default(#20163,#10000,10,9,10,9) +tokeninfo(#20162,8,#20001,68," <") +#20163=@"loc,{#10000},10,9,10,48" +locations_default(#20163,#10000,10,9,10,48) hasLocation(#20162,#20163) #20164=* tokeninfo(#20164,6,#20001,69,"elt") -#20165=@"loc,{#10000},10,10,10,12" -locations_default(#20165,#10000,10,10,10,12) +#20165=@"loc,{#10000},10,49,10,51" +locations_default(#20165,#10000,10,49,10,51) hasLocation(#20164,#20165) #20166=* tokeninfo(#20166,8,#20001,70,">") -#20167=@"loc,{#10000},10,13,10,13" -locations_default(#20167,#10000,10,13,10,13) +#20167=@"loc,{#10000},10,52,10,52" +locations_default(#20167,#10000,10,52,10,52) hasLocation(#20166,#20167) #20168=* tokeninfo(#20168,4,#20001,71," ") -#20169=@"loc,{#10000},10,14,12,2" -locations_default(#20169,#10000,10,14,12,2) +#20169=@"loc,{#10000},10,53,12,2" +locations_default(#20169,#10000,10,53,12,2) hasLocation(#20168,#20169) #20170=* tokeninfo(#20170,8,#20001,72,"<") @@ -472,12 +473,12 @@ locations_default(#20179,#10000,12,9,12,9) hasLocation(#20178,#20179) #20180=* tokeninfo(#20180,0,#20001,77,"") -#20181=@"loc,{#10000},12,10,12,9" -locations_default(#20181,#10000,12,10,12,9) +#20181=@"loc,{#10000},13,1,13,0" +locations_default(#20181,#10000,13,1,13,0) hasLocation(#20180,#20181) toplevels(#20001,0) -#20182=@"loc,{#10000},1,1,12,9" -locations_default(#20182,#10000,1,1,12,9) +#20182=@"loc,{#10000},1,1,13,0" +locations_default(#20182,#10000,1,1,13,0) hasLocation(#20001,#20182) #20183=@"var;{e};{#20000}" variables(#20183,"e",#20000) @@ -855,7 +856,7 @@ locations_default(#20271,#10000,10,1,12,9) hasLocation(#20270,#20271) stmtContainers(#20270,#20001) #20272=* -exprs(#20272,64,#20270,0,"e = ") +exprs(#20272,64,#20270,0,"e = ") #20273=@"loc,{#10000},10,5,12,8" locations_default(#20273,#10000,10,5,12,8) hasLocation(#20272,#20273) @@ -869,7 +870,7 @@ exprContainers(#20274,#20001) literals("e","e",#20274) decl(#20274,#20183) #20275=* -exprs(#20275,89,#20272,1,"\n ... ") +exprs(#20275,89,#20272,1,"") #20276=@"loc,{#10000},10,9,12,8" locations_default(#20276,#10000,10,9,12,8) hasLocation(#20275,#20276)