JS: Update featurization for absent features optimization

Absent features are now represented implicitly by the absence of a row
in the `tokenFeatures` relation, rather than explicitly by an empty
string. This leads to improved runtime performance. To enable this
implicit representation, we pass the set of supported token features to
the `scoreEndpoints` HOP. Requires CodeQL CLI v2.7.4.
This commit is contained in:
Henry Mercer
2021-12-10 15:01:52 +00:00
parent bebf4ca8fc
commit 144ec8c629
2 changed files with 5 additions and 12 deletions

View File

@@ -283,7 +283,7 @@ private module AccessPaths {
}
/** Get a name of a supported generic token-based feature. */
private string getASupportedFeatureName() {
string getASupportedFeatureName() {
result =
[
"enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName",
@@ -300,12 +300,5 @@ private string getASupportedFeatureName() {
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
// Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize.
endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and
(
if strictcount(getTokenFeature(endpoint, featureName)) = 1
then featureValue = getTokenFeature(endpoint, featureName)
else (
// Performance note: this is a Cartesian product between all endpoints and feature names.
featureValue = "" and featureName = getASupportedFeatureName()
)
)
featureValue = getTokenFeature(endpoint, featureName)
}

View File

@@ -101,9 +101,9 @@ module ModelScoring {
private int getARequestedEndpointType() { result = any(EndpointType type).getEncoding() }
predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) =
scoreEndpoints(getARequestedEndpoint/0, getARequestedEndpointType/0,
EndpointFeatures::tokenFeatures/3, getACompatibleModelChecksum/0)(endpoint,
encodedEndpointType, score)
scoreEndpoints(getARequestedEndpoint/0, EndpointFeatures::tokenFeatures/3,
EndpointFeatures::getASupportedFeatureName/0, getARequestedEndpointType/0,
getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score)
}
/**