Merge pull request #4108 from erik-krogh/packType

Approved by asgerf
This commit is contained in:
CodeQL CI
2020-08-25 10:17:28 +01:00
committed by GitHub
21 changed files with 275 additions and 83 deletions

View File

@@ -1,7 +1,10 @@
package com.semmle.js.extractor;
import java.io.File;
import java.nio.file.Path;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.Optional;
import com.semmle.js.parser.TypeScriptParser;
@@ -23,6 +26,8 @@ public class ExtractorState {
private final ConcurrentHashMap<Path, FileSnippet> snippets = new ConcurrentHashMap<>();
private static final ConcurrentMap<File, Optional<String>> packageTypeCache = new ConcurrentHashMap<>();
public TypeScriptParser getTypeScriptParser() {
return typeScriptParser;
}
@@ -36,6 +41,15 @@ public class ExtractorState {
return snippets;
}
/**
* Returns a cache for the "type" field in `package.json` files.
*
* <p>The map is thread-safe and may be mutated by the caller.
*/
public ConcurrentMap<File, Optional<String>> getPackageTypeCache() {
return this.packageTypeCache;
}
/**
* Makes this semantically equivalent to a fresh state, but may internally retain shared resources
* that are expensive to reacquire.
@@ -43,5 +57,6 @@ public class ExtractorState {
public void reset() {
typeScriptParser.reset();
snippets.clear();
packageTypeCache.clear();
}
}

View File

@@ -119,7 +119,7 @@ public class FileExtractor {
JS(".js", ".jsx", ".mjs", ".cjs", ".es6", ".es") {
@Override
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
return new ScriptExtractor(config);
return new ScriptExtractor(config, state);
}
@Override

View File

@@ -43,7 +43,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2020-08-20-2";
public static final String EXTRACTOR_VERSION = "2020-08-24";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -1,5 +1,16 @@
package com.semmle.js.extractor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.ConcurrentMap;
import java.util.Optional;
import com.google.gson.Gson;
import com.google.gson.JsonSyntaxException;
import com.semmle.js.extractor.ExtractorConfig.Platform;
import com.semmle.js.extractor.ExtractorConfig.SourceType;
import com.semmle.js.parser.ParseError;
@@ -9,19 +20,24 @@ import com.semmle.util.trap.TrapWriter.Label;
/** Extract a stand-alone JavaScript script. */
public class ScriptExtractor implements IExtractor {
private ExtractorConfig config;
private ConcurrentMap<File, Optional<String>> packageTypeCache;
public ScriptExtractor(ExtractorConfig config) {
public ScriptExtractor(ExtractorConfig config, ExtractorState state) {
this.config = config;
this.packageTypeCache = state.getPackageTypeCache();
}
/** True if files with the given extension should always be treated as modules. */
private boolean isAlwaysModule(String extension) {
return extension.equals(".mjs") || extension.equals(".es6") || extension.equals(".es");
/** True if files with the given extension and type (from package.json) should always be treated as ES2015 modules. */
private boolean isAlwaysModule(String extension, String packageType) {
if (extension.equals(".mjs") || extension.equals(".es6") || extension.equals(".es")) {
return true;
}
return "module".equals(packageType) && extension.equals(".js");
}
/** True if files with the given extension should always be treated as CommonJS modules. */
private boolean isAlwaysCommonJSModule(String extension) {
return extension.equals(".cjs");
/** True if files with the given extension and type (from package.json) should always be treated as CommonJS modules. */
private boolean isAlwaysCommonJSModule(String extension, String packageType) {
return extension.equals(".cjs") || (extension.equals(".js") && "commonjs".equals(packageType));
}
@Override
@@ -49,13 +65,16 @@ public class ScriptExtractor implements IExtractor {
locationManager.setStart(2, 1);
}
// Some file extensions are interpreted as modules by default.
String packageType = getPackageType(locationManager.getSourceFile().getParentFile());
String extension = locationManager.getSourceFileExtension();
// Some files are interpreted as modules by default.
if (config.getSourceType() == SourceType.AUTO) {
if (isAlwaysModule(locationManager.getSourceFileExtension())) {
if (isAlwaysModule(extension, packageType)) {
config = config.withSourceType(SourceType.MODULE);
}
if (isAlwaysCommonJSModule(locationManager.getSourceFileExtension())) {
config = config.withSourceType(SourceType.COMMONJS_MODULE);
if (isAlwaysCommonJSModule(extension, packageType)) {
config = config.withSourceType(SourceType.COMMONJS_MODULE).withPlatform(Platform.NODE);
}
}
@@ -78,4 +97,40 @@ public class ScriptExtractor implements IExtractor {
return loc;
}
/**
* A minimal model of `package.json` files that can be used to read the "type" field.
*/
private static class PackageJSON {
String type;
}
/**
* Returns the "type" field from the nearest `package.json` file (searching up the file hierarchy).
*/
private String getPackageType(File folder) {
if (folder == null || !folder.isDirectory()) {
return null;
}
if (packageTypeCache.containsKey(folder)) {
return packageTypeCache.get(folder).orElse(null);
}
File file = new File(folder, "package.json");
if (file.isDirectory()) {
return null;
}
if (!file.exists()) {
String result = getPackageType(folder.getParentFile());
packageTypeCache.put(folder, Optional.ofNullable(result));
return result;
}
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String result = new Gson().fromJson(reader, PackageJSON.class).type;
packageTypeCache.put(folder, Optional.ofNullable(result));
return result;
} catch (IOException | JsonSyntaxException e) {
return null;
}
}
}

View File

@@ -60,82 +60,89 @@ toplevels(#20001,0)
#20020=@"loc,{#10000},1,1,2,0"
locations_default(#20020,#10000,1,1,2,0)
hasLocation(#20001,#20020)
#20021=@"module;{#10000},1,1"
scopes(#20021,3)
scopenodes(#20001,#20021)
scopenesting(#20021,#20000)
#20022=@"var;{require};{#20021}"
variables(#20022,"require",#20021)
#20023=@"var;{module};{#20021}"
variables(#20023,"module",#20021)
#20024=@"var;{exports};{#20021}"
variables(#20024,"exports",#20021)
#20025=@"var;{__filename};{#20021}"
variables(#20025,"__filename",#20021)
#20026=@"var;{__dirname};{#20021}"
variables(#20026,"__dirname",#20021)
#20027=@"var;{arguments};{#20021}"
variables(#20027,"arguments",#20021)
#20021=@"var;{global};{#20000}"
variables(#20021,"global",#20000)
#20022=@"var;{process};{#20000}"
variables(#20022,"process",#20000)
#20023=@"var;{console};{#20000}"
variables(#20023,"console",#20000)
#20024=@"var;{Buffer};{#20000}"
variables(#20024,"Buffer",#20000)
#20025=@"module;{#10000},1,1"
scopes(#20025,3)
scopenodes(#20001,#20025)
scopenesting(#20025,#20000)
#20026=@"var;{require};{#20025}"
variables(#20026,"require",#20025)
#20027=@"var;{module};{#20025}"
variables(#20027,"module",#20025)
#20028=@"var;{exports};{#20025}"
variables(#20028,"exports",#20025)
#20029=@"var;{__filename};{#20025}"
variables(#20029,"__filename",#20025)
#20030=@"var;{__dirname};{#20025}"
variables(#20030,"__dirname",#20025)
#20031=@"var;{arguments};{#20025}"
variables(#20031,"arguments",#20025)
isModule(#20001)
#20028=*
stmts(#20028,2,#20001,0,"console ... onJS"");")
hasLocation(#20028,#20003)
stmtContainers(#20028,#20001)
#20029=*
exprs(#20029,13,#20028,0,"console ... monJS"")")
#20030=@"loc,{#10000},1,1,1,29"
locations_default(#20030,#10000,1,1,1,29)
hasLocation(#20029,#20030)
enclosingStmt(#20029,#20028)
exprContainers(#20029,#20001)
#20031=*
exprs(#20031,14,#20029,-1,"console.log")
#20032=@"loc,{#10000},1,1,1,11"
locations_default(#20032,#10000,1,1,1,11)
hasLocation(#20031,#20032)
enclosingStmt(#20031,#20028)
exprContainers(#20031,#20001)
#20032=*
stmts(#20032,2,#20001,0,"console ... onJS"");")
hasLocation(#20032,#20003)
stmtContainers(#20032,#20001)
#20033=*
exprs(#20033,79,#20031,0,"console")
hasLocation(#20033,#20005)
enclosingStmt(#20033,#20028)
exprs(#20033,13,#20032,0,"console ... monJS"")")
#20034=@"loc,{#10000},1,1,1,29"
locations_default(#20034,#10000,1,1,1,29)
hasLocation(#20033,#20034)
enclosingStmt(#20033,#20032)
exprContainers(#20033,#20001)
literals("console","console",#20033)
#20034=@"var;{console};{#20000}"
variables(#20034,"console",#20000)
bind(#20033,#20034)
#20035=*
exprs(#20035,0,#20031,1,"log")
hasLocation(#20035,#20009)
enclosingStmt(#20035,#20028)
exprs(#20035,14,#20033,-1,"console.log")
#20036=@"loc,{#10000},1,1,1,11"
locations_default(#20036,#10000,1,1,1,11)
hasLocation(#20035,#20036)
enclosingStmt(#20035,#20032)
exprContainers(#20035,#20001)
literals("log","log",#20035)
#20036=*
exprs(#20036,4,#20029,0,"""Hello CommonJS""")
hasLocation(#20036,#20013)
enclosingStmt(#20036,#20028)
exprContainers(#20036,#20001)
literals("Hello CommonJS","""Hello CommonJS""",#20036)
#20037=*
regexpterm(#20037,14,#20036,0,"Hello CommonJS")
#20038=@"loc,{#10000},1,14,1,27"
locations_default(#20038,#10000,1,14,1,27)
hasLocation(#20037,#20038)
regexpConstValue(#20037,"Hello CommonJS")
exprs(#20037,79,#20035,0,"console")
hasLocation(#20037,#20005)
enclosingStmt(#20037,#20032)
exprContainers(#20037,#20001)
literals("console","console",#20037)
bind(#20037,#20023)
#20038=*
exprs(#20038,0,#20035,1,"log")
hasLocation(#20038,#20009)
enclosingStmt(#20038,#20032)
exprContainers(#20038,#20001)
literals("log","log",#20038)
#20039=*
entry_cfg_node(#20039,#20001)
#20040=@"loc,{#10000},1,1,1,0"
locations_default(#20040,#10000,1,1,1,0)
hasLocation(#20039,#20040)
#20041=*
exit_cfg_node(#20041,#20001)
hasLocation(#20041,#20019)
successor(#20028,#20033)
successor(#20036,#20029)
successor(#20035,#20031)
successor(#20033,#20035)
successor(#20031,#20036)
successor(#20029,#20041)
successor(#20039,#20028)
exprs(#20039,4,#20033,0,"""Hello CommonJS""")
hasLocation(#20039,#20013)
enclosingStmt(#20039,#20032)
exprContainers(#20039,#20001)
literals("Hello CommonJS","""Hello CommonJS""",#20039)
#20040=*
regexpterm(#20040,14,#20039,0,"Hello CommonJS")
#20041=@"loc,{#10000},1,14,1,27"
locations_default(#20041,#10000,1,14,1,27)
hasLocation(#20040,#20041)
regexpConstValue(#20040,"Hello CommonJS")
#20042=*
entry_cfg_node(#20042,#20001)
#20043=@"loc,{#10000},1,1,1,0"
locations_default(#20043,#10000,1,1,1,0)
hasLocation(#20042,#20043)
#20044=*
exit_cfg_node(#20044,#20001)
hasLocation(#20044,#20019)
successor(#20032,#20037)
successor(#20039,#20033)
successor(#20038,#20035)
successor(#20037,#20038)
successor(#20035,#20039)
successor(#20033,#20044)
successor(#20042,#20032)
isNodejs(#20001)
numlines(#10000,1,1,0)
filetype(#10000,"javascript")

View File

@@ -0,0 +1,3 @@
// I'm invalid JSON
{
"type": "foo"

View File

@@ -0,0 +1,15 @@
#10000=@"/package.json;sourcefile"
files(#10000,"/package.json","package","json",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=*
json_errors(#20000,"Error: Unexpected token")
#20001=@"loc,{#10000},3,1,3,1"
locations_default(#20001,#10000,3,1,3,1)
hasLocation(#20000,#20001)
numlines(#10000,3,0,0)
filetype(#10000,"json")

View File

@@ -0,0 +1,28 @@
#10000=@"/tst.js;sourcefile"
files(#10000,"/tst.js","tst","js",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
numlines(#20001,0,0,0)
#20002=*
tokeninfo(#20002,0,#20001,0,"")
#20003=@"loc,{#10000},1,1,1,0"
locations_default(#20003,#10000,1,1,1,0)
hasLocation(#20002,#20003)
toplevels(#20001,0)
hasLocation(#20001,#20003)
#20004=*
entry_cfg_node(#20004,#20001)
hasLocation(#20004,#20003)
#20005=*
exit_cfg_node(#20005,#20001)
hasLocation(#20005,#20003)
successor(#20004,#20005)
numlines(#10000,0,0,0)
filetype(#10000,"javascript")

View File

@@ -0,0 +1,3 @@
{
"type": 123
}

View File

@@ -0,0 +1,22 @@
#10000=@"/package.json;sourcefile"
files(#10000,"/package.json","package","json",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=*
json(#20000,5,#10000,0,"{\n ""type"": 123\n}")
#20001=@"loc,{#10000},1,1,3,1"
locations_default(#20001,#10000,1,1,3,1)
json_locations(#20000,#20001)
#20002=*
json(#20002,2,#20000,0,"123")
#20003=@"loc,{#10000},2,11,2,13"
locations_default(#20003,#10000,2,11,2,13)
json_locations(#20002,#20003)
json_literals("123","123",#20002)
json_properties(#20000,"type",#20002)
numlines(#10000,3,0,0)
filetype(#10000,"json")

View File

@@ -0,0 +1,28 @@
#10000=@"/tst2.js;sourcefile"
files(#10000,"/tst2.js","tst2","js",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
numlines(#20001,0,0,0)
#20002=*
tokeninfo(#20002,0,#20001,0,"")
#20003=@"loc,{#10000},1,1,1,0"
locations_default(#20003,#10000,1,1,1,0)
hasLocation(#20002,#20003)
toplevels(#20001,0)
hasLocation(#20001,#20003)
#20004=*
entry_cfg_node(#20004,#20001)
hasLocation(#20004,#20003)
#20005=*
exit_cfg_node(#20005,#20001)
hasLocation(#20005,#20003)
successor(#20004,#20005)
numlines(#10000,0,0,0)
filetype(#10000,"javascript")

View File

@@ -0,0 +1 @@
console.log(".mjs inside a `type:\"commonjs\" is still a ES2015 module`");

View File

@@ -0,0 +1,3 @@
{
"type": "commonjs"
}

View File

@@ -0,0 +1 @@
console.log("I'm empty! The containing package.json determines the type.");

View File

@@ -0,0 +1,3 @@
{
"type": "module"
}

View File

@@ -0,0 +1 @@
console.log(".cjs inside a `type:\"module\" is still a CommonJS module`");

View File

@@ -0,0 +1 @@
console.log();

View File

@@ -0,0 +1 @@
console.log("I'm empty! The containing package.json determines the type.");

View File

@@ -1,5 +1,10 @@
| commonjs.cjs:1:1:3:16 | <toplevel> | node |
| commonjsPackage/innermjs.mjs:1:1:1:74 | <toplevel> | es2015 |
| commonjsPackage/tst.js:1:1:1:75 | <toplevel> | node |
| import.js:1:1:5:2 | <toplevel> | es2015 |
| mjs.mjs:1:1:1:32 | <toplevel> | es2015 |
| modulePackage/subdir/innercjs.cjs:1:1:1:74 | <toplevel> | node |
| modulePackage/subdir/subfile.js:1:1:1:14 | <toplevel> | es2015 |
| modulePackage/tst.js:1:1:1:75 | <toplevel> | es2015 |
| require.js:1:1:7:1 | <toplevel> | node |
| script.js:1:1:1:35 | <toplevel> | non-module |