From 049af68bc29dfb61336f34ac2367f2c7406393f8 Mon Sep 17 00:00:00 2001 From: erik-krogh Date: Sun, 21 Aug 2022 20:34:11 +0200 Subject: [PATCH] restrict suffix-construction to relevant regexps --- .../code/java/security/regexp/NfaUtils.qll | 24 +++++++++++++------ .../javascript/security/regexp/NfaUtils.qll | 24 +++++++++++++------ .../python/security/regexp/NfaUtils.qll | 24 +++++++++++++------ .../codeql/ruby/security/regexp/NfaUtils.qll | 24 +++++++++++++------ 4 files changed, 68 insertions(+), 28 deletions(-) diff --git a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll index ed406394ac6..a78e65a981f 100644 --- a/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll +++ b/java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll @@ -1104,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** diff --git a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll index ed406394ac6..a78e65a981f 100644 --- a/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll +++ b/javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll @@ -1104,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** diff --git a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll index ed406394ac6..a78e65a981f 100644 --- a/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll +++ b/python/ql/lib/semmle/python/security/regexp/NfaUtils.qll @@ -1104,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /** diff --git a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll index ed406394ac6..a78e65a981f 100644 --- a/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll +++ b/ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll @@ -1104,18 +1104,28 @@ module ReDoSPruning { result = getAnInputSymbolMatching(char) } + pragma[noinline] + RegExpRoot relevantRoot() { + exists(RegExpTerm term, State s | + s.getRepr() = term and isCandidateState(s) and result = term.getRootTerm() + ) + } + /** * Gets a char used for finding possible suffixes inside `root`. */ pragma[noinline] private string relevant(RegExpRoot root) { - exists(ascii(result)) and exists(root) - or - exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) - or - // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). - // The three chars must be kept in sync with `hasSimpleRejectEdge`. - result = ["|", "\n", "Z"] and exists(root) + root = relevantRoot() and + ( + exists(ascii(result)) and exists(root) + or + exists(InputSymbol s | belongsTo(s, root) | result = intersect(s, _)) + or + // The characters from `hasSimpleRejectEdge`. Only `\n` is really needed (as `\n` is not in the `ascii` relation). + // The three chars must be kept in sync with `hasSimpleRejectEdge`. + result = ["|", "\n", "Z"] and exists(root) + ) } /**