From 0654e39e72233401032cac437d4fca4e21ab25e7 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 29 Sep 2022 20:30:29 +0200 Subject: [PATCH 1/2] python: rewrite type tracker for compiled regexes we have the option to use `regex.getAValueReachingSink` rather than `regex.asSink`, but it will likely be used as a sink for data flow. --- .../lib/semmle/python/frameworks/Stdlib.qll | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index dc93677dea0..68c93047c70 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2842,26 +2842,15 @@ private module StdlibPrivate { override string getName() { result = "re." + method } } - /** Helper module for tracking compiled regexes. */ - private module CompiledRegexes { - private DataFlow::TypeTrackingNode compiledRegex(DataFlow::TypeTracker t, DataFlow::Node regex) { - t.start() and - result = API::moduleImport("re").getMember("compile").getACall() and - regex in [ - result.(DataFlow::CallCfgNode).getArg(0), - result.(DataFlow::CallCfgNode).getArgByName("pattern") - ] - or - exists(DataFlow::TypeTracker t2 | result = compiledRegex(t2, regex).track(t2, t)) - } - - DataFlow::Node compiledRegex(DataFlow::Node regex) { - compiledRegex(DataFlow::TypeTracker::end(), regex).flowsTo(result) - } + API::Node compiledRegex(API::Node regex) { + exists(API::CallNode compilation | + compilation = API::moduleImport("re").getMember("compile").getACall() + | + result = compilation.getReturn() and + regex = compilation.getParameter(0, "pattern") + ) } - private import CompiledRegexes - /** * A call on compiled regular expression (obtained via `re.compile`) executing a * regular expression. @@ -2886,7 +2875,11 @@ private module StdlibPrivate { DataFlow::Node regexNode; RegexExecutionMethod method; - CompiledRegexExecution() { this.calls(compiledRegex(regexNode), method) } + CompiledRegexExecution() { + exists(API::Node regex | regexNode = regex.asSink() | + this.calls(compiledRegex(regex).getAValueReachableFromSource(), method) + ) + } override DataFlow::Node getRegex() { result = regexNode } From 84ab8606000ae5b3f51159f5ccd96a10b32e87df Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 29 Sep 2022 20:32:19 +0200 Subject: [PATCH 2/2] python: rewrite type tracker for ldap operations There are several other clean ups I would like to do in this file, but this can wait until we promote the query. --- .../semmle/python/frameworks/LDAP.qll | 28 +++---------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/frameworks/LDAP.qll b/python/ql/src/experimental/semmle/python/frameworks/LDAP.qll index d95149bfc50..eb2cf86b338 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/LDAP.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/LDAP.qll @@ -26,11 +26,8 @@ private module Ldap { API::Node ldapInitialize() { result = ldap().getMember("initialize") } /** Gets a reference to a `ldap` operation. */ - private DataFlow::TypeTrackingNode ldapOperation(DataFlow::TypeTracker t) { - t.start() and - result.(DataFlow::AttrRead).getObject().getALocalSource() = ldapInitialize().getACall() - or - exists(DataFlow::TypeTracker t2 | result = ldapOperation(t2).track(t2, t)) + private API::Node ldapOperation(string name) { + result = ldapInitialize().getReturn().getMember(name) } /** @@ -44,24 +41,13 @@ private module Ldap { } } - /** Gets a reference to a `ldap` operation. */ - private DataFlow::Node ldapOperation() { - ldapOperation(DataFlow::TypeTracker::end()).flowsTo(result) - } - - /** Gets a reference to a `ldap` query. */ - private DataFlow::Node ldapQuery() { - result = ldapOperation() and - result.(DataFlow::AttrRead).getAttributeName() instanceof Ldap2QueryMethods - } - /** * A class to find `ldap` methods executing a query. * * See `LDAP2QueryMethods` */ private class Ldap2Query extends DataFlow::CallCfgNode, LdapQuery::Range { - Ldap2Query() { this.getFunction() = ldapQuery() } + Ldap2Query() { this = ldapOperation(any(Ldap2QueryMethods m)).getACall() } override DataFlow::Node getQuery() { result in [this.getArg(0), this.getArg(2), this.getArgByName("filterstr")] @@ -82,12 +68,6 @@ private module Ldap { } } - /** Gets a reference to a `ldap` bind. */ - private DataFlow::Node ldapBind() { - result = ldapOperation() and - result.(DataFlow::AttrRead).getAttributeName() instanceof Ldap2BindMethods - } - /**List of SSL-demanding options */ private class LdapSslOptions extends DataFlow::Node { LdapSslOptions() { @@ -101,7 +81,7 @@ private module Ldap { * See `LDAP2BindMethods` */ private class Ldap2Bind extends DataFlow::CallCfgNode, LdapBind::Range { - Ldap2Bind() { this.getFunction() = ldapBind() } + Ldap2Bind() { this = ldapOperation(any(Ldap2BindMethods m)).getACall() } override DataFlow::Node getPassword() { result in [this.getArg(1), this.getArgByName("cred")]