From 5a692f22934941cbc7b17c59e89aaa8212657eb4 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Fri, 10 Dec 2021 15:01:52 +0000 Subject: [PATCH] JS: Update featurization for absent features optimization Absent features are now represented implicitly by the absence of a row in the `tokenFeatures` relation, rather than explicitly by an empty string. This leads to improved runtime performance. To enable this implicit representation, we pass the set of supported token features to the `scoreEndpoints` HOP. Requires CodeQL CLI v2.7.4. --- .../adaptivethreatmodeling/EndpointFeatures.qll | 11 ++--------- .../adaptivethreatmodeling/EndpointScoring.qll | 6 +++--- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll index 8853b7b8534..5ab0857b56b 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointFeatures.qll @@ -292,7 +292,7 @@ private module FunctionNames { } /** Get a name of a supported generic token-based feature. */ -private string getASupportedFeatureName() { +string getASupportedFeatureName() { result = [ "enclosingFunctionName", "calleeName", "receiverName", "argumentIndex", "calleeApiName", @@ -309,12 +309,5 @@ private string getASupportedFeatureName() { predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) { // Performance optimization: Restrict feature extraction to endpoints we've explicitly asked to featurize. endpoint = any(FeaturizationConfig cfg).getAnEndpointToFeaturize() and - ( - if strictcount(getTokenFeature(endpoint, featureName)) = 1 - then featureValue = getTokenFeature(endpoint, featureName) - else ( - // Performance note: this is a Cartesian product between all endpoints and feature names. - featureValue = "" and featureName = getASupportedFeatureName() - ) - ) + featureValue = getTokenFeature(endpoint, featureName) } diff --git a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll index 651339b3d8d..0ce13b3e180 100644 --- a/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll +++ b/javascript/ql/experimental/adaptivethreatmodeling/lib/experimental/adaptivethreatmodeling/EndpointScoring.qll @@ -36,9 +36,9 @@ module ModelScoring { private int getARequestedEndpointType() { result = any(EndpointType type).getEncoding() } predicate endpointScores(DataFlow::Node endpoint, int encodedEndpointType, float score) = - scoreEndpoints(getARequestedEndpoint/0, getARequestedEndpointType/0, - EndpointFeatures::tokenFeatures/3, getACompatibleModelChecksum/0)(endpoint, - encodedEndpointType, score) + scoreEndpoints(getARequestedEndpoint/0, EndpointFeatures::tokenFeatures/3, + EndpointFeatures::getASupportedFeatureName/0, getARequestedEndpointType/0, + getACompatibleModelChecksum/0)(endpoint, encodedEndpointType, score) } /**