From 0a003edaa7e109c96e53593ff50eaa5b28d7e59e Mon Sep 17 00:00:00 2001 From: Aditya Sharad Date: Tue, 21 Dec 2021 19:57:06 -0800 Subject: [PATCH] JS: Improve performance of Xss::isOptionallySanitizedEdge When join-ordering and evaluating this conjunction, it is preferable to start with the relatively small set of `sanitizer` calls, then compute the set of SSA variables accessed as the arguments of those sanitizer calls, then reason about how those variables are used in phi nodes. Use directional binding pragmas to encourage this join order by picking `sanitizer` first, and discourage picking the opposite join order starting with `phi`. This impacts performance of the ATM XSS queries on large databases like Node, where computing all variable accesses from phi nodes leads to 435M+ tuples. --- .../javascript/security/dataflow/Xss.qll | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/dataflow/Xss.qll b/javascript/ql/lib/semmle/javascript/security/dataflow/Xss.qll index 501eed347c5..2f27bb69b9b 100644 --- a/javascript/ql/lib/semmle/javascript/security/dataflow/Xss.qll +++ b/javascript/ql/lib/semmle/javascript/security/dataflow/Xss.qll @@ -437,8 +437,27 @@ module DomBasedXss { b = phi.getAnInput().getDefinition() and count(phi.getAnInput()) = 2 and not a = b and - sanitizer = DataFlow::valueNode(a.getDef().getSource()) and - sanitizer.getAnArgument().asExpr().(VarAccess).getVariable() = b.getSourceVariable() + /* + * Performance optimisation: + * + * When join-ordering and evaluating this conjunction, + * it is preferable to start with the relatively small set of + * `sanitizer` calls, then compute the set of SSA variables accessed + * as the arguments of those sanitizer calls, then reason about how + * those variables are used in phi nodes. + * + * Use directional binding pragmas to encourage this join order, + * starting with `sanitizer`. + * + * Without these pragmas, the join orderer may choose the opposite order: + * start with all `phi` nodes, then compute the set of SSA variables involved, + * then the (potentially large) set of accesses to those variables, + * then the set of accesses used as the argument of a sanitizer call. + */ + + pragma[only_bind_out](sanitizer) = DataFlow::valueNode(a.getDef().getSource()) and + pragma[only_bind_out](sanitizer.getAnArgument().asExpr()) = + b.getSourceVariable().getAnAccess() | pred = DataFlow::ssaDefinitionNode(b) and succ = DataFlow::ssaDefinitionNode(phi)