Merge pull request #15663 from asgerf/js/endpoint-naming2

JS: Improvements to endpoint naming
This commit is contained in:
Asger F
2024-02-21 19:36:57 +01:00
committed by GitHub
21 changed files with 338 additions and 263 deletions

View File

@@ -38,22 +38,92 @@ private string join(string x, string y) {
private predicate isPackageExport(API::Node node) { node = API::moduleExport(_) }
private predicate memberEdge(API::Node pred, API::Node succ) { succ = pred.getAMember() }
/** Gets the shortest distance from a packaeg export to `nd` in the API graph. */
private int distanceFromPackageExport(API::Node nd) =
shortestDistances(isPackageExport/1, memberEdge/2)(_, nd, result)
private predicate isExported(API::Node node) {
isPackageExport(node)
or
exists(API::Node pred |
isExported(pred) and
memberEdge(pred, node) and
not isPrivateLike(node)
/**
* A version of `getInstance()` only from sink nodes to the special `ClassInstance` node.
*
* This ensures we see instance methods, but not side effects on `this` or on instantiations of the class.
*/
private predicate instanceEdge(API::Node pred, API::Node succ) {
exists(DataFlow::ClassNode cls |
pred.getAValueReachingSink() = cls and
succ = API::Internal::getClassInstance(cls)
)
}
/** Holds if `pred -> succ` is an edge we can use for naming. */
private predicate relevantEdge(API::Node pred, API::Node succ) {
succ = pred.getMember(_) and
not isPrivateLike(succ)
or
instanceEdge(pred, succ)
}
private signature predicate isRootNodeSig(API::Node node);
private signature predicate edgeSig(API::Node pred, API::Node succ);
/** Builds `shortestDistances` using the API graph root node as the only origin node, to ensure unique results. */
private module ApiGraphDistance<isRootNodeSig/1 isRootNode, edgeSig/2 edges> {
private predicate edgesWithEntry(API::Node pred, API::Node succ) {
edges(pred, succ)
or
pred = API::root() and
isRootNode(succ)
}
int distanceTo(API::Node node) = shortestDistances(API::root/0, edgesWithEntry/2)(_, node, result)
}
/** Gets the shortest distance from a package export to `nd` in the API graph. */
private predicate distanceFromPackageExport =
ApiGraphDistance<isPackageExport/1, relevantEdge/2>::distanceTo/1;
/**
* Holds if `(package, name)` is the fallback name for `cls`, to be used as a last resort
* in order to name its instance methods.
*
* This happens when the class is not accessible via an access path, but instances of the
* class can still escape via more complex access patterns, for example:
*
* class InternalClass {}
* function foo() {
* return new InternalClass();
* }
*/
private predicate classHasFallbackName(
DataFlow::ClassNode cls, string package, string name, int badness
) {
hasEscapingInstance(cls) and
not exists(distanceFromPackageExport(any(API::Node node | node.getAValueReachingSink() = cls))) and
exists(string baseName |
InternalModuleNaming::fallbackModuleName(cls.getTopLevel(), package, baseName, badness - 100) and
name = join(baseName, cls.getName())
)
}
/** Holds if `node` describes instances of a class that has a fallback name. */
private predicate isClassInstanceWithFallbackName(API::Node node) {
exists(DataFlow::ClassNode cls |
classHasFallbackName(cls, _, _, _) and
node = API::Internal::getClassInstance(cls)
)
}
/** Gets the shortest distance from a node with a fallback name, to `nd` in the API graph. */
private predicate distanceFromFallbackName =
ApiGraphDistance<isClassInstanceWithFallbackName/1, relevantEdge/2>::distanceTo/1;
/** Gets the shortest distance from a name-root (package export or fallback name) to `nd` */
private int distanceFromRoot(API::Node nd) {
result = distanceFromPackageExport(nd)
or
not exists(distanceFromPackageExport(nd)) and
result = 100 + distanceFromFallbackName(nd)
}
/** Holds if `node` can be given a name. */
private predicate isRelevant(API::Node node) { exists(distanceFromRoot(node)) }
/**
* Holds if `node` is a default export that can be reinterpreted as a namespace export,
* because the enclosing module has no named exports.
@@ -79,21 +149,29 @@ private predicate isPrivateAssignment(DataFlow::Node node) {
private predicate isPrivateLike(API::Node node) { isPrivateAssignment(node.asSink()) }
bindingset[name]
private int getNameBadness(string name) {
if name = ["constructor", "default"] then result = 10 else result = 0
}
private API::Node getASuccessor(API::Node node, string name, int badness) {
isExported(node) and
exists(string member |
result = node.getMember(member) and
if member = "default"
then
if defaultExportCanBeInterpretedAsNamespaceExport(node)
isRelevant(node) and
isRelevant(result) and
(
exists(string member |
result = node.getMember(member) and
if member = "default" and defaultExportCanBeInterpretedAsNamespaceExport(node)
then (
badness = 5 and name = ""
) else (
badness = 10 and name = "default"
name = member and
badness = getNameBadness(name)
)
else (
name = member and badness = 0
)
or
instanceEdge(node, result) and
name = "prototype" and
badness = 0
)
}
@@ -114,15 +192,17 @@ private API::Node getPreferredPredecessor(API::Node node, string name, int badne
min(API::Node pred, int b |
pred = getAPredecessor(node, _, b) and
// ensure the preferred predecessor is strictly closer to a root export, even if it means accepting more badness
distanceFromPackageExport(pred) < distanceFromPackageExport(node)
distanceFromRoot(pred) < distanceFromRoot(node)
|
b
) and
result =
min(API::Node pred, string name1 |
pred = getAPredecessor(node, name1, badness)
pred = getAPredecessor(node, name1, badness) and
// ensure the preferred predecessor is strictly closer to a root export, even if it means accepting more badness
distanceFromRoot(pred) < distanceFromRoot(node)
|
pred order by distanceFromPackageExport(pred), name1
pred order by distanceFromRoot(pred), name1
) and
name = min(string n | result = getAPredecessor(node, n, badness) | n)
}
@@ -137,6 +217,12 @@ private predicate sinkHasNameCandidate(API::Node sink, string package, string na
name = "" and
badness = 0
or
exists(DataFlow::ClassNode cls, string className |
sink = API::Internal::getClassInstance(cls) and
classHasFallbackName(cls, package, className, badness) and
name = join(className, "prototype")
)
or
exists(API::Node baseNode, string baseName, int baseBadness, string step, int stepBadness |
sinkHasNameCandidate(baseNode, package, baseName, baseBadness) and
baseNode = getPreferredPredecessor(sink, step, stepBadness) and
@@ -163,80 +249,32 @@ predicate sinkHasPrimaryName(API::Node sink, string package, string name) {
sinkHasPrimaryName(sink, package, name, _)
}
/**
* Holds if `(package, name)` is an alias for `node`.
*
* This means it is a valid name for it, but was not chosen as the primary name.
*/
private predicate sinkHasAlias(API::Node sink, string package, string name) {
not sinkHasPrimaryName(sink, package, name) and
(
exists(string baseName, string step |
sinkHasPrimaryName(getAPredecessor(sink, step, _), package, baseName) and
name = join(baseName, step)
)
or
sink = API::moduleExport(package) and
name = ""
/** Gets a source node that can flow to `sink` without using a return step. */
private DataFlow::SourceNode nodeReachingSink(API::Node sink, DataFlow::TypeBackTracker t) {
t.start() and
result = sink.asSink().getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 |
result = nodeReachingSink(sink, t2).backtrack(t2, t) and
t.hasReturn() = false
)
}
/** Gets a source node that can flow to `sink` without using a return step. */
DataFlow::SourceNode nodeReachingSink(API::Node sink) {
result = nodeReachingSink(sink, DataFlow::TypeBackTracker::end())
}
/** Gets a sink node reachable from `node`. */
bindingset[node]
private API::Node getASinkNode(DataFlow::SourceNode node) { result.getAValueReachingSink() = node }
private API::Node getASinkNode(DataFlow::SourceNode node) { node = nodeReachingSink(result) }
/**
* Holds if `node` is a declaration in an externs file.
*
* This is to ensure that functions/classes in externs are not named after a re-export in a package.
* Holds if `node` is assigned to a global access path. Note that such nodes generally do not have API nodes.
*/
private predicate nameFromExterns(DataFlow::Node node, string package, string name, int badness) {
node.getTopLevel().isExterns() and
private predicate nameFromGlobal(DataFlow::Node node, string package, string name, int badness) {
package = "global" and
node = AccessPath::getAnAssignmentTo(name) and
badness = -10
}
bindingset[qualifiedName]
private int getBadnessOfClassName(string qualifiedName) {
if qualifiedName.matches("%.constructor")
then result = 10
else
if qualifiedName = ""
then result = 5
else result = 0
}
/** Holds if `(package, name)` is a potential name for `cls`, with the given `badness`. */
private predicate classObjectHasNameCandidate(
DataFlow::ClassNode cls, string package, string name, int badness
) {
// There can be multiple API nodes associated with `cls`.
// For example:
///
// class C {}
// module.exports.A = C; // first sink
// module.exports.B = C; // second sink
//
exists(int baseBadness |
sinkHasPrimaryName(getASinkNode(cls), package, name, baseBadness) and
badness = baseBadness + getBadnessOfClassName(name)
)
or
nameFromExterns(cls, package, name, badness)
}
private predicate classObjectHasPrimaryName(
DataFlow::ClassNode cls, string package, string name, int badness
) {
badness = min(int b | classObjectHasNameCandidate(cls, _, _, b) | b) and
package = min(string p | classObjectHasNameCandidate(cls, p, _, badness) | p) and
name = min(string n | classObjectHasNameCandidate(cls, package, n, badness) | n)
}
/** Holds if `(package, name)` is the primary name for the class object of `cls`. */
predicate classObjectHasPrimaryName(DataFlow::ClassNode cls, string package, string name) {
classObjectHasPrimaryName(cls, package, name, _)
(if node.getTopLevel().isExterns() then badness = -10 else badness = 10)
}
/** Holds if an instance of `cls` can be exposed to client code. */
@@ -244,116 +282,116 @@ private predicate hasEscapingInstance(DataFlow::ClassNode cls) {
cls.getAnInstanceReference().flowsTo(any(API::Node n).asSink())
}
/**
* Holds if `(package, name)` is a potential name to use for instances of `cls`, with the given `badness`.
*/
private predicate classInstanceHasNameCandidate(
DataFlow::ClassNode cls, string package, string name, int badness
private predicate sourceNodeHasNameCandidate(
DataFlow::SourceNode node, string package, string name, int badness
) {
exists(string baseName |
classObjectHasPrimaryName(cls, package, baseName, badness) and
name = join(baseName, "prototype")
)
sinkHasPrimaryName(getASinkNode(node), package, name, badness)
or
// In case the class itself is unaccessible, but an instance is exposed via an access path,
// consider using that access path. For example:
//
// class InternalClass {}
// module.exports.foo = new InternalClass();
//
exists(int baseBadness |
sinkHasPrimaryName(getASinkNode(cls.getAnInstanceReference()), package, name, baseBadness) and
badness = baseBadness + 30 // add penalty, as we prefer to base this on the class name
)
or
// If neither the class nor its instances are accessible via an access path, but instances of the
// class can still escape via more complex access patterns, resort to a synthesized name.
// For example:
//
// class InternalClass {}
// function foo() {
// return new InternalClass();
// }
//
hasEscapingInstance(cls) and
exists(string baseName |
InternalModuleNaming::fallbackModuleName(cls.getTopLevel(), package, baseName, badness - 100) and
name = join(baseName, cls.getName()) + ".prototype"
)
nameFromGlobal(node, package, name, badness)
}
private predicate classInstanceHasPrimaryName(
DataFlow::ClassNode cls, string package, string name, int badness
private predicate sourceNodeHasPrimaryName(
DataFlow::SourceNode node, string package, string name, int badness
) {
badness = min(int b | classInstanceHasNameCandidate(cls, _, _, b) | b) and
package = min(string p | classInstanceHasNameCandidate(cls, p, _, badness) | p) and
badness = min(int b | sourceNodeHasNameCandidate(node, _, _, b) | b) and
package =
min(string p | sourceNodeHasNameCandidate(node, p, _, badness) | p order by p.length(), p) and
name =
min(string n |
classInstanceHasNameCandidate(cls, package, n, badness)
|
n order by n.length(), n
)
min(string n | sourceNodeHasNameCandidate(node, package, n, badness) | n order by n.length(), n)
}
/** Holds if `(package, name)` is the primary name to use for instances of `cls`. */
predicate classInstanceHasPrimaryName(DataFlow::ClassNode cls, string package, string name) {
classInstanceHasPrimaryName(cls, package, name, _)
}
/** Holds if `(package, name)` is an alias referring to some instance of `cls`. */
predicate classInstanceHasAlias(DataFlow::ClassNode cls, string package, string name) {
not classInstanceHasPrimaryName(cls, package, name) and
exists(int badness |
classInstanceHasNameCandidate(cls, package, name, badness) and
badness < 100 // Badness 100 is when we start to synthesize names. Do not suggest these as aliases.
)
}
private predicate functionHasNameCandidate(
DataFlow::FunctionNode function, string package, string name, int badness
) {
sinkHasPrimaryName(getASinkNode(function), package, name, badness)
or
exists(DataFlow::ClassNode cls |
function = cls.getConstructor() and
classObjectHasPrimaryName(cls, package, name, badness)
/** Gets a data flow node referring to a function value. */
private DataFlow::SourceNode functionValue(DataFlow::TypeTracker t) {
t.start() and
(
result instanceof DataFlow::FunctionNode
or
exists(string baseName, string memberName |
function = cls.getInstanceMethod(memberName) and
classInstanceHasPrimaryName(cls, package, baseName, badness) and
name = join(baseName, memberName)
or
function = cls.getStaticMethod(memberName) and
classObjectHasPrimaryName(cls, package, baseName, badness) and
name = join(baseName, memberName)
)
result instanceof DataFlow::ClassNode
or
result instanceof DataFlow::PartialInvokeNode
or
result = DataFlow::globalVarRef(["Function", "eval"]).getAnInvocation()
or
// Assume double-invocation of Function also returns a function
result = DataFlow::globalVarRef("Function").getAnInvocation().getAnInvocation()
)
or
nameFromExterns(function, package, name, badness)
exists(DataFlow::TypeTracker t2 | result = functionValue(t2).track(t2, t))
}
private predicate functionHasPrimaryName(
DataFlow::FunctionNode function, string package, string name, int badness
) {
badness = min(int b | functionHasNameCandidate(function, _, _, b) | b) and
package = min(string p | functionHasNameCandidate(function, p, _, badness) | p) and
name =
min(string n |
functionHasNameCandidate(function, package, n, badness)
|
n order by n.length(), n
)
/** Gets a data flow node referring to a function value. */
private DataFlow::SourceNode functionValue() {
result = functionValue(DataFlow::TypeTracker::end())
}
/**
* Holds if `node` is a function or a call that returns a function.
*/
private predicate isFunctionSource(DataFlow::SourceNode node) {
(
exists(getASinkNode(node))
or
nameFromGlobal(node, _, _, _)
) and
(
node instanceof DataFlow::FunctionNode
or
node instanceof DataFlow::ClassNode
or
node = functionValue() and
node instanceof DataFlow::InvokeNode and
// `getASinkNode` steps through imports (but not other calls) so exclude calls that are imports (i.e. require calls)
// as we want to get as close to the source as possible.
not node instanceof DataFlow::ModuleImportNode
)
}
/**
* Holds if `(package, name)` is the primary name for the given `function`.
*
* The `function` node may be an actual function expression, or a call site from which a function is returned.
*/
predicate functionHasPrimaryName(DataFlow::FunctionNode function, string package, string name) {
functionHasPrimaryName(function, package, name, _)
predicate functionHasPrimaryName(DataFlow::SourceNode function, string package, string name) {
sourceNodeHasPrimaryName(function, package, name, _) and
isFunctionSource(function)
}
private predicate sinkHasSourceName(API::Node sink, string package, string name, int badness) {
exists(DataFlow::SourceNode source |
sink = getASinkNode(source) and
sourceNodeHasPrimaryName(source, package, name, badness)
)
}
private predicate sinkHasPrimarySourceName(API::Node sink, string package, string name) {
strictcount(string p, string n | sinkHasSourceName(sink, p, n, _)) = 1 and
sinkHasSourceName(sink, package, name, _)
}
private predicate aliasCandidate(
string package, string name, string targetPackage, string targetName, API::Node aliasDef
) {
sinkHasPrimaryName(aliasDef, package, name) and
sinkHasPrimarySourceName(aliasDef, targetPackage, targetName) and
not sinkHasSourceName(_, package, name, _) // (package, name) cannot be an alias if a source has it as its primary name
}
private predicate nonAlias(string package, string name) {
// `(package, name)` appears to be an alias for multiple things. Treat it as a primary name instead.
strictcount(string targetPackage, string targetName |
aliasCandidate(package, name, targetPackage, targetName, _)
) > 1
or
// Not all sinks with this name agree on the alias target
exists(API::Node sink, string targetPackage, string targetName |
aliasCandidate(package, name, targetPackage, targetName, _) and
sinkHasPrimaryName(sink, package, name) and
not sinkHasPrimarySourceName(sink, targetPackage, targetName)
)
}
/**
* Holds if `(aliasPackage, aliasName)` is an alias for `(primaryPackage, primaryName)`,
* Holds if `(package, name)` is an alias for `(targetPackage, targetName)`,
* defined at `aliasDef`.
*
* Only the last component of an access path is reported as an alias, the prefix always
@@ -365,24 +403,10 @@ predicate functionHasPrimaryName(DataFlow::FunctionNode function, string package
* reported separately.
*/
predicate aliasDefinition(
string primaryPackage, string primaryName, string aliasPackage, string aliasName,
API::Node aliasDef
string package, string name, string targetPackage, string targetName, API::Node aliasDef
) {
exists(DataFlow::SourceNode source |
classObjectHasPrimaryName(source, primaryPackage, primaryName)
or
functionHasPrimaryName(source, primaryPackage, primaryName)
|
aliasDef.getAValueReachingSink() = source and
sinkHasPrimaryName(aliasDef, aliasPackage, aliasName, _) and
not (
primaryPackage = aliasPackage and
primaryName = aliasName
)
)
or
sinkHasPrimaryName(aliasDef, primaryPackage, primaryName) and
sinkHasAlias(aliasDef, aliasPackage, aliasName)
aliasCandidate(package, name, targetPackage, targetName, aliasDef) and
not nonAlias(package, name)
}
/**
@@ -432,8 +456,6 @@ private module InternalModuleNaming {
/** Holds if `(package, name)` should be used to refer to code inside `mod`. */
predicate fallbackModuleName(Module mod, string package, string name, int badness) {
sinkHasPrimaryName(getASinkNode(mod.getDefaultOrBulkExport()), package, name, badness)
or
badness = 50 and
package = getPackageRelativePath(mod) and
name = ""
@@ -462,28 +484,6 @@ module Debug {
)
}
/** Holds if the given `node` has multiple primary names. */
query string ambiguousClassObjectName(DataFlow::ClassNode node) {
strictcount(string package, string name | classObjectHasPrimaryName(node, package, name)) > 1 and
result =
concat(string package, string name |
classObjectHasPrimaryName(node, package, name)
|
renderName(package, name), ", "
)
}
/** Holds if the given `node` has multiple primary names. */
query string ambiguousClassInstanceName(DataFlow::ClassNode node) {
strictcount(string package, string name | classInstanceHasPrimaryName(node, package, name)) > 1 and
result =
concat(string package, string name |
classInstanceHasPrimaryName(node, package, name)
|
renderName(package, name), ", "
)
}
/** Holds if the given `node` has multiple primary names. */
query string ambiguousFunctionName(DataFlow::FunctionNode node) {
strictcount(string package, string name | functionHasPrimaryName(node, package, name)) > 1 and