From 623a80fe90909f891ff3e2ea42ade2476e883ec4 Mon Sep 17 00:00:00 2001 From: Asger F Date: Tue, 27 Nov 2018 14:44:43 +0000 Subject: [PATCH] TS: declassify files with unrecognized shebang line --- .../semmle/js/extractor/FileExtractor.java | 43 ++++++ .../tests/shebang/input/not-typescript.ts | 5 + .../shebang/input/typescript-with-shebang.ts | 4 + .../tests/shebang/input/typescript.ts | 3 + .../extractor/tests/shebang/options.json | 3 + .../trap/typescript-with-shebang.ts.trap | 129 ++++++++++++++++++ .../shebang/output/trap/typescript.ts.trap | 123 +++++++++++++++++ 7 files changed, 310 insertions(+) create mode 100644 javascript/extractor/tests/shebang/input/not-typescript.ts create mode 100644 javascript/extractor/tests/shebang/input/typescript-with-shebang.ts create mode 100644 javascript/extractor/tests/shebang/input/typescript.ts create mode 100644 javascript/extractor/tests/shebang/options.json create mode 100644 javascript/extractor/tests/shebang/output/trap/typescript-with-shebang.ts.trap create mode 100644 javascript/extractor/tests/shebang/output/trap/typescript.ts.trap diff --git a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java index 1be577c04ae..1e88d1897cd 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/FileExtractor.java @@ -5,6 +5,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.nio.charset.Charset; import java.util.LinkedHashSet; import java.util.Set; import java.util.regex.Pattern; @@ -38,6 +39,11 @@ public class FileExtractor { */ public static final Pattern JSON_OBJECT_START = Pattern.compile("^(?s)\\s*\\{\\s*\"([^\"]|\\\\.)*\"\\s*:.*"); + /** + * The charset for decoding UTF-8 strings. + */ + private static final Charset UTF8_CHARSET = Charset.forName("UTF-8"); + /** * Information about supported file types. */ @@ -169,6 +175,11 @@ public class FileExtractor { if (isXml(bytes, length)) return true; + // Avoid files with an unrecognized shebang header. + if (hasUnrecognizedShebang(bytes, length)) { + return true; + } + return false; } catch (IOException e) { Exceptions.ignore(e, "Let extractor handle this one."); @@ -249,6 +260,38 @@ public class FileExtractor { return false; } + /** + * Returns true if the byte sequence starts with a shebang line that is not + * recognized as a JavaScript interpreter. + */ + private boolean hasUnrecognizedShebang(byte[] bytes, int length) { + // Shebangs preceded by a BOM aren't recognized in UNIX, but the BOM might only + // be present in the source file, to be stripped out in the build process. + int startIndex = skipBOM(bytes, length); + if (startIndex + 2 >= length) return false; + if (bytes[startIndex] != '#' || bytes[startIndex + 1] != '!') { + return false; + } + int endOfLine = -1; + for (int i = startIndex; i < length; ++i) { + if (bytes[i] == '\r' || bytes[i] == '\n') { + endOfLine = i; + break; + } + } + if (endOfLine == -1) { + // The shebang is either very long or there are no other lines in the file. + // Treat this as unrecognized. + return true; + } + // Extract the shebang text + int startOfText = startIndex + "#!".length(); + int lengthOfText = endOfLine - startOfText; + String text = new String(bytes, startOfText, lengthOfText, UTF8_CHARSET); + // Check if the shebang is a recognized JavaScript intepreter. + return !NODE_INVOCATION.matcher(text).find(); + } + @Override public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) { return new TypeScriptExtractor(config, state.getTypeScriptParser()); diff --git a/javascript/extractor/tests/shebang/input/not-typescript.ts b/javascript/extractor/tests/shebang/input/not-typescript.ts new file mode 100644 index 00000000000..6953faca516 --- /dev/null +++ b/javascript/extractor/tests/shebang/input/not-typescript.ts @@ -0,0 +1,5 @@ +#!/usr/bin/env perl + +use strict; + +exit 0; diff --git a/javascript/extractor/tests/shebang/input/typescript-with-shebang.ts b/javascript/extractor/tests/shebang/input/typescript-with-shebang.ts new file mode 100644 index 00000000000..bed2eaa1fbb --- /dev/null +++ b/javascript/extractor/tests/shebang/input/typescript-with-shebang.ts @@ -0,0 +1,4 @@ +#!/usr/bin/env node +interface Foo { + x: number; +} diff --git a/javascript/extractor/tests/shebang/input/typescript.ts b/javascript/extractor/tests/shebang/input/typescript.ts new file mode 100644 index 00000000000..a9a86d5ce04 --- /dev/null +++ b/javascript/extractor/tests/shebang/input/typescript.ts @@ -0,0 +1,3 @@ +interface Foo { + x: number; +} diff --git a/javascript/extractor/tests/shebang/options.json b/javascript/extractor/tests/shebang/options.json new file mode 100644 index 00000000000..cdf22d686fd --- /dev/null +++ b/javascript/extractor/tests/shebang/options.json @@ -0,0 +1,3 @@ +{ + "typescript": true +} diff --git a/javascript/extractor/tests/shebang/output/trap/typescript-with-shebang.ts.trap b/javascript/extractor/tests/shebang/output/trap/typescript-with-shebang.ts.trap new file mode 100644 index 00000000000..cd7804866b8 --- /dev/null +++ b/javascript/extractor/tests/shebang/output/trap/typescript-with-shebang.ts.trap @@ -0,0 +1,129 @@ +#10000=@"/typescript-with-shebang.ts;sourcefile" +files(#10000,"/typescript-with-shebang.ts","typescript-with-shebang","ts",0) +#10001=@"/;folder" +folders(#10001,"/","") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +toplevels(#20001,0) +#20002=@"loc,{#10000},1,1,5,0" +locations_default(#20002,#10000,1,1,5,0) +hasLocation(#20001,#20002) +#20003=@"local_type_name;{Foo};{#20000}" +local_type_names(#20003,"Foo",#20000) +#20004=* +stmts(#20004,34,#20001,0,"#!/usr/ ... mber;\n}") +#20005=@"loc,{#10000},1,1,4,1" +locations_default(#20005,#10000,1,1,4,1) +hasLocation(#20004,#20005) +stmtContainers(#20004,#20001) +#20006=* +typeexprs(#20006,1,#20004,0,"Foo") +#20007=@"loc,{#10000},2,11,2,13" +locations_default(#20007,#10000,2,11,2,13) +hasLocation(#20006,#20007) +enclosingStmt(#20006,#20004) +exprContainers(#20006,#20001) +literals("Foo","Foo",#20006) +typedecl(#20006,#20003) +#20008=* +properties(#20008,#20004,2,8,"x: number;") +#20009=@"loc,{#10000},3,3,3,12" +locations_default(#20009,#10000,3,3,3,12) +hasLocation(#20008,#20009) +#20010=* +exprs(#20010,0,#20008,0,"x") +#20011=@"loc,{#10000},3,3,3,3" +locations_default(#20011,#10000,3,3,3,3) +hasLocation(#20010,#20011) +enclosingStmt(#20010,#20004) +exprContainers(#20010,#20001) +literals("x","x",#20010) +isAbstractMember(#20008) +#20012=* +typeexprs(#20012,2,#20008,2,"number") +#20013=@"loc,{#10000},3,6,3,11" +locations_default(#20013,#10000,3,6,3,11) +hasLocation(#20012,#20013) +enclosingStmt(#20012,#20004) +exprContainers(#20012,#20001) +literals("number","number",#20012) +#20014=* +lines(#20014,#20001,"#!/usr/bin/env node"," +") +#20015=@"loc,{#10000},1,1,1,19" +locations_default(#20015,#10000,1,1,1,19) +hasLocation(#20014,#20015) +#20016=* +lines(#20016,#20001,"interface Foo {"," +") +#20017=@"loc,{#10000},2,1,2,15" +locations_default(#20017,#10000,2,1,2,15) +hasLocation(#20016,#20017) +#20018=* +lines(#20018,#20001," x: number;"," +") +#20019=@"loc,{#10000},3,1,3,12" +locations_default(#20019,#10000,3,1,3,12) +hasLocation(#20018,#20019) +indentation(#10000,3," ",2) +#20020=* +lines(#20020,#20001,"}"," +") +#20021=@"loc,{#10000},4,1,4,1" +locations_default(#20021,#10000,4,1,4,1) +hasLocation(#20020,#20021) +numlines(#20001,4,3,0) +#20022=* +tokeninfo(#20022,7,#20001,0,"interface") +#20023=@"loc,{#10000},2,1,2,9" +locations_default(#20023,#10000,2,1,2,9) +hasLocation(#20022,#20023) +#20024=* +tokeninfo(#20024,6,#20001,1,"Foo") +hasLocation(#20024,#20007) +#20025=* +tokeninfo(#20025,8,#20001,2,"{") +#20026=@"loc,{#10000},2,15,2,15" +locations_default(#20026,#10000,2,15,2,15) +hasLocation(#20025,#20026) +#20027=* +tokeninfo(#20027,6,#20001,3,"x") +hasLocation(#20027,#20011) +#20028=* +tokeninfo(#20028,8,#20001,4,":") +#20029=@"loc,{#10000},3,4,3,4" +locations_default(#20029,#10000,3,4,3,4) +hasLocation(#20028,#20029) +#20030=* +tokeninfo(#20030,7,#20001,5,"number") +hasLocation(#20030,#20013) +#20031=* +tokeninfo(#20031,8,#20001,6,";") +#20032=@"loc,{#10000},3,12,3,12" +locations_default(#20032,#10000,3,12,3,12) +hasLocation(#20031,#20032) +#20033=* +tokeninfo(#20033,8,#20001,7,"}") +hasLocation(#20033,#20021) +#20034=* +tokeninfo(#20034,0,#20001,8,"") +#20035=@"loc,{#10000},5,1,5,0" +locations_default(#20035,#10000,5,1,5,0) +hasLocation(#20034,#20035) +#20036=* +entry_cfg_node(#20036,#20001) +#20037=@"loc,{#10000},1,1,1,0" +locations_default(#20037,#10000,1,1,1,0) +hasLocation(#20036,#20037) +#20038=* +exit_cfg_node(#20038,#20001) +hasLocation(#20038,#20035) +successor(#20004,#20038) +successor(#20036,#20004) +numlines(#10000,4,3,0) +filetype(#10000,"typescript") diff --git a/javascript/extractor/tests/shebang/output/trap/typescript.ts.trap b/javascript/extractor/tests/shebang/output/trap/typescript.ts.trap new file mode 100644 index 00000000000..936f5ce0896 --- /dev/null +++ b/javascript/extractor/tests/shebang/output/trap/typescript.ts.trap @@ -0,0 +1,123 @@ +#10000=@"/typescript.ts;sourcefile" +files(#10000,"/typescript.ts","typescript","ts",0) +#10001=@"/;folder" +folders(#10001,"/","") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +toplevels(#20001,0) +#20002=@"loc,{#10000},1,1,4,0" +locations_default(#20002,#10000,1,1,4,0) +hasLocation(#20001,#20002) +#20003=@"local_type_name;{Foo};{#20000}" +local_type_names(#20003,"Foo",#20000) +#20004=* +stmts(#20004,34,#20001,0,"interfa ... mber;\n}") +#20005=@"loc,{#10000},1,1,3,1" +locations_default(#20005,#10000,1,1,3,1) +hasLocation(#20004,#20005) +stmtContainers(#20004,#20001) +#20006=* +typeexprs(#20006,1,#20004,0,"Foo") +#20007=@"loc,{#10000},1,11,1,13" +locations_default(#20007,#10000,1,11,1,13) +hasLocation(#20006,#20007) +enclosingStmt(#20006,#20004) +exprContainers(#20006,#20001) +literals("Foo","Foo",#20006) +typedecl(#20006,#20003) +#20008=* +properties(#20008,#20004,2,8,"x: number;") +#20009=@"loc,{#10000},2,3,2,12" +locations_default(#20009,#10000,2,3,2,12) +hasLocation(#20008,#20009) +#20010=* +exprs(#20010,0,#20008,0,"x") +#20011=@"loc,{#10000},2,3,2,3" +locations_default(#20011,#10000,2,3,2,3) +hasLocation(#20010,#20011) +enclosingStmt(#20010,#20004) +exprContainers(#20010,#20001) +literals("x","x",#20010) +isAbstractMember(#20008) +#20012=* +typeexprs(#20012,2,#20008,2,"number") +#20013=@"loc,{#10000},2,6,2,11" +locations_default(#20013,#10000,2,6,2,11) +hasLocation(#20012,#20013) +enclosingStmt(#20012,#20004) +exprContainers(#20012,#20001) +literals("number","number",#20012) +#20014=* +lines(#20014,#20001,"interface Foo {"," +") +#20015=@"loc,{#10000},1,1,1,15" +locations_default(#20015,#10000,1,1,1,15) +hasLocation(#20014,#20015) +#20016=* +lines(#20016,#20001," x: number;"," +") +#20017=@"loc,{#10000},2,1,2,12" +locations_default(#20017,#10000,2,1,2,12) +hasLocation(#20016,#20017) +indentation(#10000,2," ",2) +#20018=* +lines(#20018,#20001,"}"," +") +#20019=@"loc,{#10000},3,1,3,1" +locations_default(#20019,#10000,3,1,3,1) +hasLocation(#20018,#20019) +numlines(#20001,3,3,0) +#20020=* +tokeninfo(#20020,7,#20001,0,"interface") +#20021=@"loc,{#10000},1,1,1,9" +locations_default(#20021,#10000,1,1,1,9) +hasLocation(#20020,#20021) +#20022=* +tokeninfo(#20022,6,#20001,1,"Foo") +hasLocation(#20022,#20007) +#20023=* +tokeninfo(#20023,8,#20001,2,"{") +#20024=@"loc,{#10000},1,15,1,15" +locations_default(#20024,#10000,1,15,1,15) +hasLocation(#20023,#20024) +#20025=* +tokeninfo(#20025,6,#20001,3,"x") +hasLocation(#20025,#20011) +#20026=* +tokeninfo(#20026,8,#20001,4,":") +#20027=@"loc,{#10000},2,4,2,4" +locations_default(#20027,#10000,2,4,2,4) +hasLocation(#20026,#20027) +#20028=* +tokeninfo(#20028,7,#20001,5,"number") +hasLocation(#20028,#20013) +#20029=* +tokeninfo(#20029,8,#20001,6,";") +#20030=@"loc,{#10000},2,12,2,12" +locations_default(#20030,#10000,2,12,2,12) +hasLocation(#20029,#20030) +#20031=* +tokeninfo(#20031,8,#20001,7,"}") +hasLocation(#20031,#20019) +#20032=* +tokeninfo(#20032,0,#20001,8,"") +#20033=@"loc,{#10000},4,1,4,0" +locations_default(#20033,#10000,4,1,4,0) +hasLocation(#20032,#20033) +#20034=* +entry_cfg_node(#20034,#20001) +#20035=@"loc,{#10000},1,1,1,0" +locations_default(#20035,#10000,1,1,1,0) +hasLocation(#20034,#20035) +#20036=* +exit_cfg_node(#20036,#20001) +hasLocation(#20036,#20033) +successor(#20004,#20036) +successor(#20034,#20004) +numlines(#10000,3,3,0) +filetype(#10000,"typescript")