Python: Fixup threat-models for os.environ.get()

Since using `.DictionaryElementAny` doesn't actually do a store on the
source, (so we can later follow any dict read-steps).

I added the ensure_tainted steps to highlight that the result of the
WHOLE expression ends up "tainted", and that we don't just mark
`os.environ` as the source without further flow.
This commit is contained in:
Rasmus Wriedt Larsen
2024-08-09 15:19:42 +02:00
parent b9239d7101
commit 56c85ffe54
4 changed files with 33 additions and 23 deletions

View File

@@ -5,12 +5,12 @@ extensions:
data:
- ['os', 'Member[getenv].ReturnValue', 'environment']
- ['os', 'Member[getenvb].ReturnValue', 'environment']
- ['os', 'Member[environ].DictionaryElementAny', 'environment']
- ['os', 'Member[environb].DictionaryElementAny', 'environment']
- ['posix', 'Member[environ].DictionaryElementAny', 'environment']
- ['os', 'Member[environ]', 'environment']
- ['os', 'Member[environb]', 'environment']
- ['posix', 'Member[environ]', 'environment']
- ['sys', 'Member[argv].DictionaryElementAny', 'commandargs']
- ['sys', 'Member[orig_argv].DictionaryElementAny', 'commandargs']
- ['sys', 'Member[argv]', 'commandargs']
- ['sys', 'Member[orig_argv]', 'commandargs']
# TODO: argparse
# TODO: input / read from stdin

View File

@@ -15,6 +15,7 @@ import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.RemoteFlowSources
import TestUtilities.InlineExpectationsTest
private import semmle.python.dataflow.new.internal.PrintNode
private import semmle.python.Concepts
DataFlow::Node shouldBeTainted() {
exists(DataFlow::CallCfgNode call |
@@ -45,7 +46,7 @@ module Conf {
source.(DataFlow::CfgNode).getNode() = call.getAnArg()
)
or
source instanceof RemoteFlowSource
source instanceof ThreatModelSource
}
predicate isSink(DataFlow::Node sink) {

View File

@@ -2,18 +2,25 @@ import os
import sys
import posix
os.getenv("foo") # $ threatModelSource[environment]=os.getenv(..)
os.getenvb("bar") # $ threatModelSource[environment]=os.getenvb(..)
ensure_tainted(
os.getenv("foo"), # $ tainted threatModelSource[environment]=os.getenv(..)
os.getenvb("bar"), # $ tainted threatModelSource[environment]=os.getenvb(..)
os.environ["foo"] # $ threatModelSource[environment]=os.environ["foo"]
os.environ.get("foo") # $ MISSING: threatModelSource[environment]=os.environ.get(..)
os.environ["foo"], # $ tainted threatModelSource[environment]=os.environ
os.environ.get("foo"), # $ tainted threatModelSource[environment]=os.environ
os.environb["bar"] # $ threatModelSource[environment]=os.environb["bar"]
posix.environ[b"foo"] # $ threatModelSource[environment]=posix.environ[b"foo"]
os.environb["bar"], # $ tainted threatModelSource[environment]=os.environb
posix.environ[b"foo"], # $ tainted threatModelSource[environment]=posix.environ
sys.argv[1] # $ threatModelSource[commandargs]=sys.argv[1]
sys.orig_argv[1] # $ threatModelSource[commandargs]=sys.orig_argv[1]
sys.argv[1], # $ tainted threatModelSource[commandargs]=sys.argv
sys.orig_argv[1], # $ tainted threatModelSource[commandargs]=sys.orig_argv
)
for k,v in os.environ.items(): # $ threatModelSource[environment]=os.environ
ensure_tainted(k) # $ tainted
ensure_tainted(v) # $ tainted
########################################
# argparse
@@ -23,21 +30,23 @@ import argparse
parser = argparse.ArgumentParser()
parser.add_argument("foo")
args = parser.parse_args()
args.foo # $ MISSING: threatModelSource[commandargs]=args.foo
args = parser.parse_args() # $ MISSING: threatModelSource[commandargs]=parser.parse_args()
ensure_tainted(args.foo) # $ MISSING: tainted
explicit_argv_parsing = parser.parse_args(sys.argv)
explicit_argv_parsing.foo # $ MISSING: threatModelSource[commandargs]=explicit_argv_parsing.foo
explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv
ensure_tainted(explicit_argv_parsing.foo) # $ MISSING: tainted
fake_args = parser.parse_args(["<foo>"])
fake_args.foo
ensure_not_tainted(fake_args.foo)
########################################
# reading input from stdin
########################################
sys.stdin.readline() # $ MISSING: threatModelSource
input() # $ MISSING: threatModelSource
ensure_tainted(
sys.stdin.readline(), # $ MISSING: tainted threatModelSource
input(), # $ MISSING: tainted threatModelSource
)
########################################
# socket
@@ -46,4 +55,4 @@ input() # $ MISSING: threatModelSource
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(("example.com", 1234))
s.recv(1024) # $ MISSING: threatModelSource[socket]
ensure_tainted(s.recv(1024)) # $ MISSING: tainted threatModelSource[socket]

View File

@@ -45,7 +45,7 @@ def func2(environ, start_response): # $ requestHandler
start_response(status, headers) # $ headerWriteBulk=headers headerWriteBulkUnsanitized=name,value
return [b"Hello"] # $ HttpResponse responseBody=List
case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv[1]
case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv
if case == "1":
server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler)
server.set_app(func)