Compare commits

...

10 Commits

Author SHA1 Message Date
Ian Wright
a716d39370 repatch 2021-12-17 16:59:31 +00:00
Ian Wright
335b2466a9 patch again 2021-12-17 16:49:40 +00:00
Ian Wright
96ae9617ec post cherry-pick patch 2021-12-17 16:43:07 +00:00
Henry Mercer
82029663b2 JS: Push FeaturizationConfig context into more predicates 2021-12-17 16:03:15 +00:00
Henry Mercer
4cd15ba654 JS: Only featurize endpoints that are part of a flow path 2021-12-17 15:58:46 +00:00
Ian Wright
c17c10e450 Revert "JS: Push FeaturizationConfig context into more predicates"
This reverts commit a0f479d503.
2021-12-17 15:54:03 +00:00
Henry Mercer
a0f479d503 JS: Push FeaturizationConfig context into more predicates 2021-12-17 13:54:25 +00:00
Ian Wright
24a5e8a8e1 bump the release number 2021-12-17 13:12:05 +00:00
Henry Mercer
427cdf480a JS: Update featurization for absent features optimization
Absent features are now represented implicitly by the absence of a row
in the `tokenFeatures` relation, rather than explicitly by an empty
string. This leads to improved runtime performance. To enable this
implicit representation, we pass the set of supported token features to
the `scoreEndpoints` HOP. Requires CodeQL CLI v2.7.4.
2021-12-17 13:10:10 +00:00
Ian Wright
8e1f2645cb bump the release number 2021-12-17 13:10:10 +00:00
4 changed files with 87 additions and 66 deletions

View File

@@ -6,7 +6,20 @@
import javascript import javascript
import CodeToFeatures import CodeToFeatures
import EndpointScoring private import EndpointScoring
/**
* A configuration that defines which endpoints should be featurized.
*
* This is used as a performance optimization to ensure that we only featurize the endpoints we need
* to featurize.
*/
abstract class FeaturizationConfig extends string {
bindingset[this]
FeaturizationConfig() { any() }
abstract DataFlow::Node getAnEndpointToFeaturize();
}
/** /**
* Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`. * Gets the value of the token-based feature named `featureName` for the endpoint `endpoint`.
@@ -14,6 +27,9 @@ import EndpointScoring
* This is a single string containing a space-separated list of tokens. * This is a single string containing a space-separated list of tokens.
*/ */
private string getTokenFeature(DataFlow::Node endpoint, string featureName) { private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
(
// Features for endpoints that are contained within a function. // Features for endpoints that are contained within a function.
exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) | exists(DatabaseFeatures::Entity entity | entity = getRepresentativeEntityForEndpoint(endpoint) |
// The name of the function that encloses the endpoint. // The name of the function that encloses the endpoint.
@@ -60,6 +76,7 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
accessPath, " " accessPath, " "
) )
) )
)
} }
/** /**
@@ -77,6 +94,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
private string getACallBasedTokenFeatureComponent( private string getACallBasedTokenFeatureComponent(
DataFlow::Node endpoint, DataFlow::CallNode call, string featureName DataFlow::Node endpoint, DataFlow::CallNode call, string featureName
) { ) {
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
// Features for endpoints that are an argument to a function call. // Features for endpoints that are an argument to a function call.
endpoint = call.getAnArgument() and endpoint = call.getAnArgument() and
( (
@@ -111,6 +130,9 @@ private string getACallBasedTokenFeatureComponent(
module FunctionBodies { module FunctionBodies {
/** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */ /** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) { private predicate bodyTokens(DatabaseFeatures::Entity entity, Location location, string token) {
// Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
entity =
getRepresentativeEntityForEndpoint(any(FeaturizationConfig cfg).getAnEndpointToFeaturize()) and
exists(DatabaseFeatures::AstNode node | exists(DatabaseFeatures::AstNode node |
DatabaseFeatures::astNodes(entity, _, _, node, _) and DatabaseFeatures::astNodes(entity, _, _, node, _) and
token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and token = unique(string t | DatabaseFeatures::nodeAttributes(node, t)) and
@@ -261,7 +283,7 @@ private module AccessPaths {
} }
/** Get a name of a supported generic token-based feature. */ /** Get a name of a supported generic token-based feature. */
private string getASupportedFeatureName() { string getASupportedFeatureName() {
result = result =
[ [
"enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName", "enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName",
@@ -276,13 +298,7 @@ private string getASupportedFeatureName() {
* `featureValue` for the endpoint `endpoint`. * `featureValue` for the endpoint `endpoint`.
*/ */
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) { predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
ModelScoring::endpoints(endpoint) and // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
( endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
if strictcount(getTokenFeature(endpoint, featureName)) = 1 featureValue = getTokenFeature(endpoint, featureName)
then featureValue = getTokenFeature(endpoint, featureName)
else (
// Performance note: this is a Cartesian product between all endpoints and feature names.
featureValue = "" and featureName = getASupportedFeatureName()
)
)
} }

View File

@@ -80,22 +80,25 @@ DatabaseFeatures::Entity getRepresentativeEntityForEndpoint(DataFlow::Node endpo
} }
module ModelScoring { module ModelScoring {
predicate endpoints(DataFlow::Node endpoint) { /**
getCfg().isEffectiveSource(endpoint) or * A featurization config that only featurizes new candidate endpoints that are part of a flow
getCfg().isEffectiveSink(endpoint) * path.
*/
class RelevantFeaturizationConfig extends EndpointFeatures::FeaturizationConfig {
RelevantFeaturizationConfig() { this = "RelevantFeaturization" }
override DataFlow::Node getAnEndpointToFeaturize() { getCfg().isEffectiveSource(result) and any(DataFlow::Configuration cfg).hasFlow(result, _)
or
getCfg().isEffectiveSink(result) and any(DataFlow::Configuration cfg).hasFlow(_, result) }
} }
private int requestedEndpointTypes() { result = any(EndpointType type).getEncoding() } DataFlow::Node getARequestedEndpoint() { result = any(EndpointFeatures::FeaturizationConfig cfg).getAnEndpointToFeaturize() }
private predicate relevantTokenFeatures( private int getARequestedEndpointType() { result = any(EndpointType type).getEncoding() }
DataFlow::Node endpoint, string featureName, string featureValue
) {
endpoints(endpoint) and
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
}
predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) = predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
scoreEndpoints(endpoints/1, requestedEndpointTypes/0, relevantTokenFeatures/3, scoreEndpoints(getARequestedEndpoint/0, EndpointFeatures::tokenFeatures/3,
EndpointFeatures::getASupportedFeatureName/0, getARequestedEndpointType/0,
getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score) getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
} }
@@ -212,7 +215,9 @@ class EndpointScoringResults extends ScoringResults {
} }
module Debugging { module Debugging {
query predicate hopInputEndpoints = ModelScoring::endpoints/1; query predicate hopInputEndpoints(DataFlow::Node endpoint) {
endpoint = ModelScoring::getARequestedEndpoint()
}
query predicate endpointScores = ModelScoring::endpointScores/3; query predicate endpointScores = ModelScoring::endpointScores/3;

View File

@@ -1,5 +1,5 @@
name: codeql/javascript-experimental-atm-lib name: codeql/javascript-experimental-atm-lib
version: 0.0.0 version: 0.0.2
extractor: javascript extractor: javascript
library: true library: true
dependencies: dependencies:

View File

@@ -1,6 +1,6 @@
name: codeql/javascript-experimental-atm-queries name: codeql/javascript-experimental-atm-queries
language: javascript language: javascript
version: 0.0.0 version: 0.0.2
suites: codeql-suites suites: codeql-suites
defaultSuiteFile: codeql-suites/javascript-atm-code-scanning.qls defaultSuiteFile: codeql-suites/javascript-atm-code-scanning.qls
dependencies: dependencies: