Merge branch 'main' into ts4

This commit is contained in:
Erik Krogh Kristensen
2020-08-21 15:08:30 +02:00
277 changed files with 20078 additions and 3191 deletions

View File

@@ -669,7 +669,9 @@ public class ASTExtractor {
public Label visit(Program nd, Context c) {
contextManager.enterContainer(toplevelLabel);
isStrict = hasUseStrict(nd.getBody());
boolean prevIsStrict = isStrict;
isStrict = isStrict || hasUseStrict(nd.getBody());
// Add platform-specific globals.
scopeManager.addVariables(platform.getPredefinedGlobals());
@@ -715,6 +717,8 @@ public class ASTExtractor {
emitNodeSymbol(nd, toplevelLabel);
isStrict = prevIsStrict;
return toplevelLabel;
}

View File

@@ -144,7 +144,7 @@ import com.semmle.util.trap.TrapWriter;
*
* <ul>
* <li>All JavaScript files, that is, files with one of the extensions supported by {@link
* FileType#JS} (currently ".js", ".jsx", ".mjs", ".es6", ".es").
* FileType#JS} (currently ".js", ".jsx", ".mjs", ".cjs", ".es6", ".es").
* <li>All HTML files, that is, files with with one of the extensions supported by {@link
* FileType#HTML} (currently ".htm", ".html", ".xhtm", ".xhtml", ".vue").
* <li>All YAML files, that is, files with one of the extensions supported by {@link
@@ -210,6 +210,7 @@ public class AutoBuild {
private final String defaultEncoding;
private ExecutorService threadPool;
private volatile boolean seenCode = false;
private volatile boolean seenFiles = false;
private boolean installDependencies = false;
private int installDependenciesTimeout;
private final VirtualSourceRoot virtualSourceRoot;
@@ -472,7 +473,11 @@ public class AutoBuild {
shutdownThreadPool();
}
if (!seenCode) {
warn("No JavaScript or TypeScript code found.");
if (seenFiles) {
warn("Only found JavaScript or TypeScript files that were empty or contained syntax errors.");
} else {
warn("No JavaScript or TypeScript code found.");
}
return -1;
}
return 0;
@@ -1201,6 +1206,7 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
long start = logBeginProcess("Extracting " + file);
Integer loc = extractor.extract(f, state);
if (!extractor.getConfig().isExterns() && (loc == null || loc != 0)) seenCode = true;
if (!extractor.getConfig().isExterns()) seenFiles = true;
logEndProcess(start, "Done extracting " + file);
} catch (Throwable t) {
System.err.println("Exception while extracting " + file + ".");

View File

@@ -41,8 +41,66 @@ public class FileExtractor {
public static final Pattern JSON_OBJECT_START =
Pattern.compile("^(?s)\\s*\\{\\s*\"([^\"]|\\\\.)*\"\\s*:.*");
/** The charset for decoding UTF-8 strings. */
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
/**
* Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters.
*/
private static boolean hasUnprintableUtf8(byte[] bytes, int length) {
// Constants for bytes with N high-order 1-bits.
// They are typed as `int` as the subsequent byte-to-int promotion would
// otherwise fill the high-order `int` bits with 1s.
final int high1 = 0b10000000;
final int high2 = 0b11000000;
final int high3 = 0b11100000;
final int high4 = 0b11110000;
final int high5 = 0b11111000;
int startIndex = skipBOM(bytes, length);
for (int i = startIndex; i < length; ++i) {
int b = bytes[i];
if ((b & high1) == 0) { // 0xxxxxxx is an ASCII character
// ASCII values 0-31 are unprintable, except 9-13 are whitespace.
// 127 is the unprintable DEL character.
if (b <= 8 || 14 <= b && b <= 31 || b == 127) {
return true;
}
} else {
// Check for malformed UTF-8 multibyte code point
int trailingBytes = 0;
if ((b & high3) == high2) {
trailingBytes = 1; // 110xxxxx 10xxxxxx
} else if ((b & high4) == high3) {
trailingBytes = 2; // 1110xxxx 10xxxxxx 10xxxxxx
} else if ((b & high5) == high4) {
trailingBytes = 3; // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
} else {
return true; // 10xxxxxx and 11111xxx are not valid here.
}
// Trailing bytes must be of form 10xxxxxx
while (trailingBytes > 0) {
++i;
--trailingBytes;
if (i >= length) {
return false;
}
if ((bytes[i] & high2) != high1) {
return true;
}
}
}
}
return false;
}
/** Returns the index after the initial BOM, if any, otherwise 0. */
private static int skipBOM(byte[] bytes, int length) {
if (length >= 2
&& (bytes[0] == (byte) 0xfe && bytes[1] == (byte) 0xff
|| bytes[0] == (byte) 0xff && bytes[1] == (byte) 0xfe)) {
return 2;
} else {
return 0;
}
}
/** Information about supported file types. */
public static enum FileType {
@@ -58,7 +116,7 @@ public class FileExtractor {
}
},
JS(".js", ".jsx", ".mjs", ".es6", ".es") {
JS(".js", ".jsx", ".mjs", ".cjs", ".es6", ".es") {
@Override
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
return new ScriptExtractor(config);
@@ -66,6 +124,10 @@ public class FileExtractor {
@Override
protected boolean contains(File f, String lcExt, ExtractorConfig config) {
if (isBinaryFile(f, lcExt, config)) {
return false;
}
if (super.contains(f, lcExt, config)) return true;
// detect Node.js scripts that are meant to be run from
@@ -90,6 +152,32 @@ public class FileExtractor {
public String toString() {
return "javascript";
}
/** Number of bytes to read from the beginning of a ".js" file to detect if it is a binary file. */
private static final int fileHeaderSize = 128;
/** Computes if `f` is a binary file based on whether the initial `fileHeaderSize` bytes are printable UTF-8 chars. */
private boolean isBinaryFile(File f, String lcExt, ExtractorConfig config) {
if (!config.getDefaultEncoding().equals(StandardCharsets.UTF_8.name())) {
return false;
}
try (FileInputStream fis = new FileInputStream(f)) {
byte[] bytes = new byte[fileHeaderSize];
int length = fis.read(bytes);
if (length == -1) return false;
// Avoid invalid or unprintable UTF-8 files.
if (hasUnprintableUtf8(bytes, length)) {
return true;
}
return false;
} catch (IOException e) {
Exceptions.ignore(e, "Let extractor handle this one.");
}
return false;
}
},
JSON(".json") {
@@ -160,7 +248,7 @@ public class FileExtractor {
if (length == -1) return false;
// Avoid invalid or unprintable UTF-8 files.
if (config.getDefaultEncoding().equals("UTF-8") && hasUnprintableUtf8(bytes, length)) {
if (config.getDefaultEncoding().equals(StandardCharsets.UTF_8.name()) && hasUnprintableUtf8(bytes, length)) {
return true;
}
@@ -182,17 +270,6 @@ public class FileExtractor {
return false;
}
/** Returns the index after the initial BOM, if any, otherwise 0. */
private int skipBOM(byte[] bytes, int length) {
if (length >= 2
&& (bytes[0] == (byte) 0xfe && bytes[1] == (byte) 0xff
|| bytes[0] == (byte) 0xff && bytes[1] == (byte) 0xfe)) {
return 2;
} else {
return 0;
}
}
private boolean isXml(byte[] bytes, int length) {
int startIndex = skipBOM(bytes, length);
// Check for `<` encoded in Ascii/UTF-8 or litte-endian UTF-16.
@@ -211,56 +288,6 @@ public class FileExtractor {
return s.startsWith("! TOUCHSTONE file ") || s.startsWith("[Version] 2.0");
}
/**
* Returns true if the byte sequence contains invalid UTF-8 or unprintable ASCII characters.
*/
private boolean hasUnprintableUtf8(byte[] bytes, int length) {
// Constants for bytes with N high-order 1-bits.
// They are typed as `int` as the subsequent byte-to-int promotion would
// otherwise fill the high-order `int` bits with 1s.
final int high1 = 0b10000000;
final int high2 = 0b11000000;
final int high3 = 0b11100000;
final int high4 = 0b11110000;
final int high5 = 0b11111000;
int startIndex = skipBOM(bytes, length);
for (int i = startIndex; i < length; ++i) {
int b = bytes[i];
if ((b & high1) == 0) { // 0xxxxxxx is an ASCII character
// ASCII values 0-31 are unprintable, except 9-13 are whitespace.
// 127 is the unprintable DEL character.
if (b <= 8 || 14 <= b && b <= 31 || b == 127) {
return true;
}
} else {
// Check for malformed UTF-8 multibyte code point
int trailingBytes = 0;
if ((b & high3) == high2) {
trailingBytes = 1; // 110xxxxx 10xxxxxx
} else if ((b & high4) == high3) {
trailingBytes = 2; // 1110xxxx 10xxxxxx 10xxxxxx
} else if ((b & high5) == high4) {
trailingBytes = 3; // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
} else {
return true; // 10xxxxxx and 11111xxx are not valid here.
}
// Trailing bytes must be of form 10xxxxxx
while (trailingBytes > 0) {
++i;
--trailingBytes;
if (i >= length) {
return false;
}
if ((bytes[i] & high2) != high1) {
return true;
}
}
}
}
return false;
}
/**
* Returns true if the byte sequence starts with a shebang line that is not recognized as a
* JavaScript interpreter.
@@ -288,7 +315,7 @@ public class FileExtractor {
// Extract the shebang text
int startOfText = startIndex + "#!".length();
int lengthOfText = endOfLine - startOfText;
String text = new String(bytes, startOfText, lengthOfText, UTF8_CHARSET);
String text = new String(bytes, startOfText, lengthOfText, StandardCharsets.UTF_8);
// Check if the shebang is a recognized JavaScript intepreter.
return !NODE_INVOCATION.matcher(text).find();
}

View File

@@ -43,7 +43,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2020-04-01";
public static final String EXTRACTOR_VERSION = "2020-08-20-2";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -19,6 +19,11 @@ public class ScriptExtractor implements IExtractor {
return extension.equals(".mjs") || extension.equals(".es6") || extension.equals(".es");
}
/** True if files with the given extension should always be treated as CommonJS modules. */
private boolean isAlwaysCommonJSModule(String extension) {
return extension.equals(".cjs");
}
@Override
public LoCInfo extract(TextualExtractor textualExtractor) {
LocationManager locationManager = textualExtractor.getLocationManager();
@@ -45,9 +50,13 @@ public class ScriptExtractor implements IExtractor {
}
// Some file extensions are interpreted as modules by default.
if (isAlwaysModule(locationManager.getSourceFileExtension())) {
if (config.getSourceType() == SourceType.AUTO)
if (config.getSourceType() == SourceType.AUTO) {
if (isAlwaysModule(locationManager.getSourceFileExtension())) {
config = config.withSourceType(SourceType.MODULE);
}
if (isAlwaysCommonJSModule(locationManager.getSourceFileExtension())) {
config = config.withSourceType(SourceType.COMMONJS_MODULE);
}
}
ScopeManager scopeManager =

View File

@@ -0,0 +1 @@
console.log("Hello CommonJS");

View File

@@ -0,0 +1,141 @@
#10000=@"/tst4.cjs;sourcefile"
files(#10000,"/tst4.cjs","tst4","cjs",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
#20002=*
lines(#20002,#20001,"console.log(""Hello CommonJS"");","
")
#20003=@"loc,{#10000},1,1,1,30"
locations_default(#20003,#10000,1,1,1,30)
hasLocation(#20002,#20003)
numlines(#20001,1,1,0)
#20004=*
tokeninfo(#20004,6,#20001,0,"console")
#20005=@"loc,{#10000},1,1,1,7"
locations_default(#20005,#10000,1,1,1,7)
hasLocation(#20004,#20005)
#20006=*
tokeninfo(#20006,8,#20001,1,".")
#20007=@"loc,{#10000},1,8,1,8"
locations_default(#20007,#10000,1,8,1,8)
hasLocation(#20006,#20007)
#20008=*
tokeninfo(#20008,6,#20001,2,"log")
#20009=@"loc,{#10000},1,9,1,11"
locations_default(#20009,#10000,1,9,1,11)
hasLocation(#20008,#20009)
#20010=*
tokeninfo(#20010,8,#20001,3,"(")
#20011=@"loc,{#10000},1,12,1,12"
locations_default(#20011,#10000,1,12,1,12)
hasLocation(#20010,#20011)
#20012=*
tokeninfo(#20012,4,#20001,4,"""Hello CommonJS""")
#20013=@"loc,{#10000},1,13,1,28"
locations_default(#20013,#10000,1,13,1,28)
hasLocation(#20012,#20013)
#20014=*
tokeninfo(#20014,8,#20001,5,")")
#20015=@"loc,{#10000},1,29,1,29"
locations_default(#20015,#10000,1,29,1,29)
hasLocation(#20014,#20015)
#20016=*
tokeninfo(#20016,8,#20001,6,";")
#20017=@"loc,{#10000},1,30,1,30"
locations_default(#20017,#10000,1,30,1,30)
hasLocation(#20016,#20017)
#20018=*
tokeninfo(#20018,0,#20001,7,"")
#20019=@"loc,{#10000},2,1,2,0"
locations_default(#20019,#10000,2,1,2,0)
hasLocation(#20018,#20019)
toplevels(#20001,0)
#20020=@"loc,{#10000},1,1,2,0"
locations_default(#20020,#10000,1,1,2,0)
hasLocation(#20001,#20020)
#20021=@"module;{#10000},1,1"
scopes(#20021,3)
scopenodes(#20001,#20021)
scopenesting(#20021,#20000)
#20022=@"var;{require};{#20021}"
variables(#20022,"require",#20021)
#20023=@"var;{module};{#20021}"
variables(#20023,"module",#20021)
#20024=@"var;{exports};{#20021}"
variables(#20024,"exports",#20021)
#20025=@"var;{__filename};{#20021}"
variables(#20025,"__filename",#20021)
#20026=@"var;{__dirname};{#20021}"
variables(#20026,"__dirname",#20021)
#20027=@"var;{arguments};{#20021}"
variables(#20027,"arguments",#20021)
isModule(#20001)
#20028=*
stmts(#20028,2,#20001,0,"console ... onJS"");")
hasLocation(#20028,#20003)
stmtContainers(#20028,#20001)
#20029=*
exprs(#20029,13,#20028,0,"console ... monJS"")")
#20030=@"loc,{#10000},1,1,1,29"
locations_default(#20030,#10000,1,1,1,29)
hasLocation(#20029,#20030)
enclosingStmt(#20029,#20028)
exprContainers(#20029,#20001)
#20031=*
exprs(#20031,14,#20029,-1,"console.log")
#20032=@"loc,{#10000},1,1,1,11"
locations_default(#20032,#10000,1,1,1,11)
hasLocation(#20031,#20032)
enclosingStmt(#20031,#20028)
exprContainers(#20031,#20001)
#20033=*
exprs(#20033,79,#20031,0,"console")
hasLocation(#20033,#20005)
enclosingStmt(#20033,#20028)
exprContainers(#20033,#20001)
literals("console","console",#20033)
#20034=@"var;{console};{#20000}"
variables(#20034,"console",#20000)
bind(#20033,#20034)
#20035=*
exprs(#20035,0,#20031,1,"log")
hasLocation(#20035,#20009)
enclosingStmt(#20035,#20028)
exprContainers(#20035,#20001)
literals("log","log",#20035)
#20036=*
exprs(#20036,4,#20029,0,"""Hello CommonJS""")
hasLocation(#20036,#20013)
enclosingStmt(#20036,#20028)
exprContainers(#20036,#20001)
literals("Hello CommonJS","""Hello CommonJS""",#20036)
#20037=*
regexpterm(#20037,14,#20036,0,"Hello CommonJS")
#20038=@"loc,{#10000},1,14,1,27"
locations_default(#20038,#10000,1,14,1,27)
hasLocation(#20037,#20038)
regexpConstValue(#20037,"Hello CommonJS")
#20039=*
entry_cfg_node(#20039,#20001)
#20040=@"loc,{#10000},1,1,1,0"
locations_default(#20040,#10000,1,1,1,0)
hasLocation(#20039,#20040)
#20041=*
exit_cfg_node(#20041,#20001)
hasLocation(#20041,#20019)
successor(#20028,#20033)
successor(#20036,#20029)
successor(#20035,#20031)
successor(#20033,#20035)
successor(#20031,#20036)
successor(#20029,#20041)
successor(#20039,#20028)
numlines(#10000,1,1,0)
filetype(#10000,"javascript")

View File

@@ -311,10 +311,10 @@ scopenodes(#20001,#20112)
scopenesting(#20112,#20000)
isModule(#20001)
isES2015Module(#20001)
#20113=@"var;{fun};{#20112}"
variables(#20113,"fun",#20112)
#20114=@"var;{Class};{#20112}"
variables(#20114,"Class",#20112)
#20113=@"var;{Class};{#20112}"
variables(#20113,"Class",#20112)
#20114=@"var;{fun};{#20112}"
variables(#20114,"fun",#20112)
#20115=@"var;{Class2};{#20112}"
variables(#20115,"Class2",#20112)
#20116=@"local_type_name;{Class};{#20112}"
@@ -347,7 +347,7 @@ hasLocation(#20123,#20037)
enclosingStmt(#20123,#20118)
exprContainers(#20123,#20001)
literals("Class","Class",#20123)
decl(#20123,#20114)
decl(#20123,#20113)
typedecl(#20123,#20116)
#20124=*
scopes(#20124,10)
@@ -499,7 +499,7 @@ exprs(#20161,78,#20159,-1,"fun")
hasLocation(#20161,#20086)
exprContainers(#20161,#20159)
literals("fun","fun",#20161)
decl(#20161,#20113)
decl(#20161,#20114)
#20162=*
scopes(#20162,1)
scopenodes(#20159,#20162)

View File

@@ -155,12 +155,12 @@ scopenodes(#20001,#20055)
scopenesting(#20055,#20000)
isModule(#20001)
isES2015Module(#20001)
#20056=@"var;{f};{#20055}"
variables(#20056,"f",#20055)
#20057=@"var;{foo};{#20055}"
variables(#20057,"foo",#20055)
#20058=@"var;{C};{#20055}"
variables(#20058,"C",#20055)
#20056=@"var;{foo};{#20055}"
variables(#20056,"foo",#20055)
#20057=@"var;{C};{#20055}"
variables(#20057,"C",#20055)
#20058=@"var;{f};{#20055}"
variables(#20058,"f",#20055)
#20059=@"local_type_name;{C};{#20055}"
local_type_names(#20059,"C",#20055)
#20060=*
@@ -186,7 +186,7 @@ hasLocation(#20065,#20017)
enclosingStmt(#20065,#20061)
exprContainers(#20065,#20001)
literals("foo","foo",#20065)
decl(#20065,#20057)
decl(#20065,#20056)
#20066=*
exprs(#20066,3,#20063,1,"42")
hasLocation(#20066,#20021)
@@ -209,7 +209,7 @@ hasLocation(#20070,#20029)
enclosingStmt(#20070,#20068)
exprContainers(#20070,#20001)
literals("C","C",#20070)
decl(#20070,#20058)
decl(#20070,#20057)
typedecl(#20070,#20059)
#20071=*
scopes(#20071,10)
@@ -260,7 +260,7 @@ exprs(#20083,78,#20081,-1,"f")
hasLocation(#20083,#20041)
exprContainers(#20083,#20081)
literals("f","f",#20083)
decl(#20083,#20056)
decl(#20083,#20058)
#20084=*
scopes(#20084,1)
scopenodes(#20081,#20084)