From c9fc7882e6ff1396c957cb6b1d459dc8bcdc32dd Mon Sep 17 00:00:00 2001 From: Taus Date: Tue, 1 Apr 2025 13:59:19 +0000 Subject: [PATCH] Actions: Fix bad performance in `getTargetPath` Seen on `github/codeql`, some queries had very poor performance: ``` [2/24 eval 36m4s] Evaluation done; writing results to codeql/actions-queries/Security/CWE-312/ExcessiveSecretsExposure.bqrs ``` Investigating further lead to the following worrying sequence of joins (after I ran out of patience and cancelled the query): ``` [2025-04-01 12:31:03] Tuple counts for Yaml::YamlInclude.getTargetPath/0#dispred#32565107#fb#reorder_1_0/2@i6#9f4b2jw1 after 8m40s: ... 559418 ~33% {1} r5 = SCAN `Yaml::YamlNode.getLocation/0#dispred#24555c57#prev_delta` OUTPUT In.1 ... 909345525 ~821% {3} r7 = JOIN r5 WITH `Yaml::YamlNode.getLocation/0#dispred#24555c57#prev` CARTESIAN PRODUCT OUTPUT Rhs.1, Lhs.0 'result', Rhs.0 909342139 ~779% {3} | JOIN WITH `Locations::Location.getFile/0#dispred#dcf38c8d#prev` ON FIRST 1 OUTPUT Rhs.1, Lhs.1 'result', Lhs.2 909338753 ~794% {3} | JOIN WITH containerparent_10#join_rhs ON FIRST 1 OUTPUT Rhs.1, Lhs.1 'result', Lhs.2 909335367 ~824% {3} | JOIN WITH `FileSystem::Container.getAbsolutePath/0#dispred#d234e6fa` ON FIRST 1 OUTPUT Lhs.2, Lhs.1 'result', Rhs.1 883246724 ~812% {3} | JOIN WITH `Yaml::YamlNode.getDocument/0#dispred#ee1eb3bf#bf_10#join_rhs` ON FIRST 1 OUTPUT Rhs.1 'this', Lhs.1 'result', Lhs.2 760047185 ~838% {5} | JOIN WITH yaml_scalars ON FIRST 1 OUTPUT Lhs.1 'result', Lhs.0 'this', Rhs.2, _, Lhs.2 0 ~0% {4} | REWRITE WITH Tmp.3 := "/", Out.3 := (In.4 ++ Tmp.3 ++ InOut.2), TEST Out.3 = InOut.0 KEEPING 4 {4} | REWRITE WITH NOT [TEST InOut.2 startsWith "/"] ... ``` The culprit turned out to be the following method on class `YamlInclude` ```ql private string getTargetPath() { exists(string path | path = this.getValue() | if path.matches("/%") then result = path else result = this.getDocument().getLocation().getFile().getParentContainer().getAbsolutePath() + "/" + path ) } ``` Basically, in the `else` branch, the evaluator was producing all possible values of `result` before filtering out the ones where the `path` component started with a forward slash. To fix this, I opted to factor out the logic into two helper predicates, each accounting for whether `this.getValue()` does or does not start with a `/`. With this, evaluating the original query from a clean cache takes roughly 3.3s. --- shared/yaml/codeql/yaml/Yaml.qll | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/shared/yaml/codeql/yaml/Yaml.qll b/shared/yaml/codeql/yaml/Yaml.qll index 028654de226..422be89124f 100644 --- a/shared/yaml/codeql/yaml/Yaml.qll +++ b/shared/yaml/codeql/yaml/Yaml.qll @@ -424,14 +424,23 @@ module Make { * Gets the absolute path of the file included by this directive. */ private string getTargetPath() { - exists(string path | path = this.getValue() | - if path.matches("/%") - then result = path - else - result = - this.getDocument().getLocation().getFile().getParentContainer().getAbsolutePath() + "/" + - path - ) + result = this.getAbsolutePath() + or + result = + this.getDocument().getLocation().getFile().getParentContainer().getAbsolutePath() + "/" + + this.getRelativePath() + } + + /** Join-order helper for `getTargetPath`. Gets the path but only if it is an absolute path. */ + private string getAbsolutePath() { + result = this.getValue() and + result.matches("/%") + } + + /** Join-order helper for `getTargetPath`. Gets the path, but only if it is a relative path. */ + private string getRelativePath() { + result = this.getValue() and + not result.matches("/%") } }