From e1801f3a297aaec8b3eaa585f128a279cdcbb419 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 16:20:41 +0200 Subject: [PATCH] Python: Proper threat-model handling for argparse --- .../semmle/python/frameworks/Stdlib.model.yml | 10 +++++++++- .../lib/semmle/python/frameworks/Stdlib.qll | 20 +++++++++++++++++++ .../frameworks/stdlib/threat_models.py | 8 ++++---- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index f3bfd5ac889..ea5d810d8db 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -12,5 +12,13 @@ extensions: - ['sys', 'Member[argv]', 'commandargs'] - ['sys', 'Member[orig_argv]', 'commandargs'] - # TODO: argparse + # if no argument is given, the default is to use sys.argv[1:] + - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs'] + - addsTo: + pack: codeql/python-all + extensible: summaryModel + data: + - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint'] + # note: taint of attribute lookups is handled in QL + # TODO: input / read from stdin diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 3c23b392991..db20a25567c 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4989,6 +4989,26 @@ module StdlibPrivate { override string getKind() { result = Escaping::getHtmlKind() } } + + // --------------------------------------------------------------------------- + // argparse + // --------------------------------------------------------------------------- + /** + * if result of `parse_args` is tainted (because it uses command-line arguments), + * then the parsed values accesssed on any attribute lookup is also tainted. + */ + private class ArgumentParserAnyAttributeStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + nodeFrom = + API::moduleImport("argparse") + .getMember("ArgumentParser") + .getReturn() + .getMember("parse_args") + .getReturn() + .getAValueReachableFromSource() and + nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom + } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py index cac276eacad..ecaf981849f 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -30,14 +30,14 @@ import argparse parser = argparse.ArgumentParser() parser.add_argument("foo") -args = parser.parse_args() # $ MISSING: threatModelSource[commandargs]=parser.parse_args() -ensure_tainted(args.foo) # $ MISSING: tainted +args = parser.parse_args() # $ threatModelSource[commandargs]=parser.parse_args() +ensure_tainted(args.foo) # $ tainted explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv -ensure_tainted(explicit_argv_parsing.foo) # $ MISSING: tainted +ensure_tainted(explicit_argv_parsing.foo) # $ tainted fake_args = parser.parse_args([""]) -ensure_not_tainted(fake_args.foo) +ensure_not_tainted(fake_args.foo) # $ SPURIOUS: tainted ######################################## # reading input from stdin