diff --git a/python/ql/lib/semmle/python/security/dataflow/TarSlip.qll b/python/ql/lib/semmle/python/security/dataflow/TarSlip.qll new file mode 100644 index 00000000000..90c79da1f04 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/TarSlip.qll @@ -0,0 +1,29 @@ +/** + * Provides a taint-tracking configuration for detecting "command injection" vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `TarSlip::Configuration` is needed, otherwise + * `TarSlipCustomizations` should be imported instead. + */ + +private import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import TarSlipCustomizations::TarSlip + +/** + * A taint-tracking configuration for detecting "command injection" vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "TarSlip" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } +} diff --git a/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll new file mode 100644 index 00000000000..7393b129f37 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll @@ -0,0 +1,145 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * "tar slip" + * vulnerabilities, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import semmle.python.dataflow.new.BarrierGuards +private import semmle.python.ApiGraphs + +/** + * Provides default sources, sinks and sanitizers for detecting + * "tar slip" + * vulnerabilities, as well as extension points for adding your own. + */ +module TarSlip { + /** + * A data flow source for "tar slip" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "tar slip" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for "tar slip" vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** + * A sanitizer guard for "tar slip" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * A source of exception info, considered as a flow source. + */ + class TarfileOpen extends Source { + TarfileOpen() { + this = API::moduleImport("tarfile").getMember("open").getACall() and + // If argument refers to a string object, then it's a hardcoded path and + // this tarfile is safe. + not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StrConst and + /* Ignore opens within the tarfile module itself */ + not this.getLocation().getFile().getBaseName() = "tarfile.py" + } + } + + /** + * For efficiency we don't want to track the flow of taint + * around the tarfile module. + */ + class ExcludeTarFilePy extends Sanitizer { + ExcludeTarFilePy() { this.getLocation().getFile().getBaseName() = "tarfile.py" } + } + + /** + * For a call to `file.extractall` without arguments, `file` is considered a sink. + */ + class ExtractAllSink extends Sink { + ExtractAllSink() { + exists(DataFlow::CallCfgNode call | + call = + API::moduleImport("tarfile") + .getMember("open") + .getReturn() + .getMember("extractall") + .getACall() and + not exists(call.getArg(_)) and + not exists(call.getArgByName(_)) and + this = call.(DataFlow::MethodCallNode).getObject() + ) + } + } + + /** + * An argument to `extract` is considered a sink. + */ + class ExtractSink extends Sink { + ExtractSink() { + exists(DataFlow::CallCfgNode call | + call = + API::moduleImport("tarfile").getMember("open").getReturn().getMember("extract").getACall() and + this = call.getArg(0) + ) + } + } + + /* Members argument to extract method */ + class ExtractMembersSink extends Sink { + ExtractMembersSink() { + exists(DataFlow::CallCfgNode call | + call = + API::moduleImport("tarfile") + .getMember("open") + .getReturn() + .getMember("extractall") + .getACall() and + this in [call.getArg(0), call.getArgByName("members")] + ) + } + } + + class TarFileInfoSanitizer extends SanitizerGuard { + ControlFlowNode tarInfo; + + TarFileInfoSanitizer() { + exists(CallNode call, AttrNode attr | + this = call and + // We must test the name of the tar info object. + attr = call.getAnArg() and + attr.getName() = "name" and + attr.getObject() = tarInfo + | + // Assume that any test with "path" in it is a sanitizer + call.getAChild*().(AttrNode).getName().matches("%path") + or + call.getAChild*().(NameNode).getId().matches("%path") + ) + } + + override predicate checks(ControlFlowNode checked, boolean branch) { + checked = tarInfo and + branch in [true, false] + } + + DataFlow::ExprNode shouldGuard() { + tarInfo.dominates(result.asCfgNode()) and + // exists(EssaDefinition def | + // def.getAUse() = tarInfo and + // def.getAUse() = result.asCfgNode() + // ) and + exists(SsaSourceVariable v | + v.getAUse() = tarInfo and + v.getAUse() = result.asCfgNode() + ) + } + } + + DataFlow::ExprNode getAGuardedNode(TarFileInfoSanitizer tfis) { result = tfis.getAGuardedNode() } +} diff --git a/python/ql/src/Security/CWE-022/TarSlip.ql b/python/ql/src/Security/CWE-022/TarSlip.ql index 76d799a0aca..1528be7377d 100644 --- a/python/ql/src/Security/CWE-022/TarSlip.ql +++ b/python/ql/src/Security/CWE-022/TarSlip.ql @@ -13,170 +13,10 @@ */ import python -import semmle.python.security.Paths -import semmle.python.dataflow.TaintTracking -import semmle.python.security.strings.Basic +import semmle.python.security.dataflow.TarSlip +import DataFlow::PathGraph -/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */ -class OpenTarFile extends TaintKind { - OpenTarFile() { this = "tarfile.open" } - - override TaintKind getTaintOfMethodResult(string name) { - name = "getmember" and result instanceof TarFileInfo - or - name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo - } - - override ClassValue getType() { result = Value::named("tarfile.TarFile") } - - override TaintKind getTaintForIteration() { result instanceof TarFileInfo } -} - -/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */ -class TarfileOpen extends TaintSource { - TarfileOpen() { - Value::named("tarfile.open").getACall() = this and - /* - * If argument refers to a string object, then it's a hardcoded path and - * this tarfile is safe. - */ - - not this.(CallNode).getAnArg().pointsTo(any(StringValue str)) and - /* Ignore opens within the tarfile module itself */ - not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py" - } - - override predicate isSourceOf(TaintKind kind) { kind instanceof OpenTarFile } -} - -class TarFileInfo extends TaintKind { - TarFileInfo() { this = "tarfile.entry" } - - override TaintKind getTaintOfMethodResult(string name) { name = "next" and result = this } - - override TaintKind getTaintOfAttribute(string name) { - name = "name" and result instanceof TarFileInfo - } -} - -/* - * For efficiency we don't want to track the flow of taint - * around the tarfile module. - */ - -class ExcludeTarFilePy extends Sanitizer { - ExcludeTarFilePy() { this = "Tar sanitizer" } - - override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) { - node.getLocation().getFile().getBaseName() = "tarfile.py" and - ( - taint instanceof OpenTarFile - or - taint instanceof TarFileInfo - or - taint.(SequenceKind).getItem() instanceof TarFileInfo - ) - } -} - -/* Any call to an extractall method */ -class ExtractAllSink extends TaintSink { - ExtractAllSink() { - exists(CallNode call | - this = call.getFunction().(AttrNode).getObject("extractall") and - not exists(call.getAnArg()) - ) - } - - override predicate sinks(TaintKind kind) { kind instanceof OpenTarFile } -} - -/* Argument to extract method */ -class ExtractSink extends TaintSink { - CallNode call; - - ExtractSink() { - call.getFunction().(AttrNode).getName() = "extract" and - this = call.getArg(0) - } - - override predicate sinks(TaintKind kind) { kind instanceof TarFileInfo } -} - -/* Members argument to extract method */ -class ExtractMembersSink extends TaintSink { - CallNode call; - - ExtractMembersSink() { - call.getFunction().(AttrNode).getName() = "extractall" and - (this = call.getArg(0) or this = call.getArgByName("members")) - } - - override predicate sinks(TaintKind kind) { - kind.(SequenceKind).getItem() instanceof TarFileInfo - or - kind instanceof OpenTarFile - } -} - -class TarFileInfoSanitizer extends Sanitizer { - TarFileInfoSanitizer() { this = "TarInfo sanitizer" } - - /* The test `if :` clears taint on its `false` edge. */ - override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) { - taint instanceof TarFileInfo and - clears_taint_on_false_edge(test.getTest(), test.getSense()) - } - - private predicate clears_taint_on_false_edge(ControlFlowNode test, boolean sense) { - path_sanitizing_test(test) and - sense = false - or - // handle `not` (also nested) - test.(UnaryExprNode).getNode().getOp() instanceof Not and - clears_taint_on_false_edge(test.(UnaryExprNode).getOperand(), sense.booleanNot()) - } -} - -private predicate path_sanitizing_test(ControlFlowNode test) { - /* Assume that any test with "path" in it is a sanitizer */ - test.getAChild+().(AttrNode).getName().matches("%path") - or - test.getAChild+().(NameNode).getId().matches("%path") -} - -class TarSlipConfiguration extends TaintTracking::Configuration { - TarSlipConfiguration() { this = "TarSlip configuration" } - - override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen } - - override predicate isSink(TaintTracking::Sink sink) { - sink instanceof ExtractSink or - sink instanceof ExtractAllSink or - sink instanceof ExtractMembersSink - } - - override predicate isSanitizer(Sanitizer sanitizer) { - sanitizer instanceof TarFileInfoSanitizer - or - sanitizer instanceof ExcludeTarFilePy - } - - override predicate isBarrier(DataFlow::Node node) { - // Avoid flow into the tarfile module - exists(ParameterDefinition def | - node.asVariable().getDefinition() = def - or - node.asCfgNode() = def.getDefiningNode() - | - def.getScope() = Value::named("tarfile.open").(CallableValue).getScope() - or - def.isSelf() and def.getScope().getEnclosingModule().getName() = "tarfile" - ) - } -} - -from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink -where config.hasFlowPath(src, sink) -select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(), +from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink +where config.hasFlowPath(source, sink) +select sink.getNode(), source, sink, "Extraction of tarfile from $@", source.getNode(), "a potentially untrusted source" diff --git a/python/ql/test/query-tests/Security/CWE-022-TarSlip/options b/python/ql/test/query-tests/Security/CWE-022-TarSlip/options deleted file mode 100644 index 492768b3481..00000000000 --- a/python/ql/test/query-tests/Security/CWE-022-TarSlip/options +++ /dev/null @@ -1 +0,0 @@ -semmle-extractor-options: -p ../lib/ --max-import-depth=3