mirror of
https://github.com/github/codeql.git
synced 2026-04-29 02:35:15 +02:00
Merge pull request #6184 from github/rdmarsh2/improve-exec-tainted
C++: Refactor ExecTainted.ql to only report results after string concatenation
This commit is contained in:
@@ -9,7 +9,7 @@ int main(int argc, char** argv) {
|
||||
system(command1);
|
||||
}
|
||||
|
||||
{
|
||||
{
|
||||
// GOOD: the user string is encoded by a library routine.
|
||||
char userNameQuoted[1000] = {0};
|
||||
encodeShellString(userNameQuoted, 1000, userName);
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
* @description Using user-supplied data in an OS command, without
|
||||
* neutralizing special elements, can make code vulnerable
|
||||
* to command injection.
|
||||
* @kind problem
|
||||
* @kind path-problem
|
||||
* @problem.severity error
|
||||
* @security-severity 9.8
|
||||
* @precision low
|
||||
* @precision high
|
||||
* @id cpp/command-line-injection
|
||||
* @tags security
|
||||
* external/cwe/cwe-078
|
||||
@@ -16,13 +16,204 @@
|
||||
import cpp
|
||||
import semmle.code.cpp.security.CommandExecution
|
||||
import semmle.code.cpp.security.Security
|
||||
import semmle.code.cpp.security.TaintTracking
|
||||
import semmle.code.cpp.valuenumbering.GlobalValueNumbering
|
||||
import semmle.code.cpp.ir.IR
|
||||
import semmle.code.cpp.ir.dataflow.TaintTracking
|
||||
import semmle.code.cpp.ir.dataflow.TaintTracking2
|
||||
import semmle.code.cpp.security.FlowSources
|
||||
import semmle.code.cpp.models.implementations.Strcat
|
||||
|
||||
from Expr taintedArg, Expr taintSource, string taintCause, string callChain
|
||||
Expr sinkAsArgumentIndirection(DataFlow::Node sink) {
|
||||
result =
|
||||
sink.asOperand()
|
||||
.(SideEffectOperand)
|
||||
.getAddressOperand()
|
||||
.getAnyDef()
|
||||
.getUnconvertedResultExpression()
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `fst` is a string that is used in a format or concatenation function resulting in `snd`,
|
||||
* and is *not* placed at the start of the resulting string. This indicates that the author did not
|
||||
* expect `fst` to control what program is run if the resulting string is eventually interpreted as
|
||||
* a command line, for example as an argument to `system`.
|
||||
*/
|
||||
predicate interestingConcatenation(DataFlow::Node fst, DataFlow::Node snd) {
|
||||
exists(FormattingFunctionCall call, int index, FormatLiteral literal |
|
||||
sinkAsArgumentIndirection(fst) = call.getConversionArgument(index) and
|
||||
snd.asDefiningArgument() = call.getOutputArgument(false) and
|
||||
literal = call.getFormat() and
|
||||
not literal.getConvSpecOffset(index) = 0 and
|
||||
literal.getConversionChar(index) = ["s", "S"]
|
||||
)
|
||||
or
|
||||
// strcat and friends
|
||||
exists(StrcatFunction strcatFunc, CallInstruction call, ReadSideEffectInstruction rse |
|
||||
call.getStaticCallTarget() = strcatFunc and
|
||||
rse.getArgumentDef() = call.getArgument(strcatFunc.getParamSrc()) and
|
||||
fst.asOperand() = rse.getSideEffectOperand() and
|
||||
snd.asInstruction().(WriteSideEffectInstruction).getDestinationAddress() =
|
||||
call.getArgument(strcatFunc.getParamDest())
|
||||
)
|
||||
or
|
||||
exists(CallInstruction call, Operator op, ReadSideEffectInstruction rse |
|
||||
call.getStaticCallTarget() = op and
|
||||
op.hasQualifiedName("std", "operator+") and
|
||||
op.getType().(UserType).hasQualifiedName("std", "basic_string") and
|
||||
call.getArgument(1) = rse.getArgumentOperand().getAnyDef() and // left operand
|
||||
fst.asOperand() = rse.getSideEffectOperand() and
|
||||
call = snd.asInstruction()
|
||||
)
|
||||
}
|
||||
|
||||
class TaintToConcatenationConfiguration extends TaintTracking::Configuration {
|
||||
TaintToConcatenationConfiguration() { this = "TaintToConcatenationConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) { source instanceof FlowSource }
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) { interestingConcatenation(sink, _) }
|
||||
|
||||
override predicate isSanitizer(DataFlow::Node node) {
|
||||
node.asInstruction().getResultType() instanceof IntegralType
|
||||
or
|
||||
node.asInstruction().getResultType() instanceof FloatingPointType
|
||||
}
|
||||
}
|
||||
|
||||
class ExecTaintConfiguration extends TaintTracking2::Configuration {
|
||||
ExecTaintConfiguration() { this = "ExecTaintConfiguration" }
|
||||
|
||||
override predicate isSource(DataFlow::Node source) {
|
||||
exists(DataFlow::Node prevSink, TaintToConcatenationConfiguration conf |
|
||||
conf.hasFlow(_, prevSink) and
|
||||
interestingConcatenation(prevSink, source)
|
||||
)
|
||||
}
|
||||
|
||||
override predicate isSink(DataFlow::Node sink) {
|
||||
shellCommand(sinkAsArgumentIndirection(sink), _)
|
||||
}
|
||||
|
||||
override predicate isSanitizerOut(DataFlow::Node node) {
|
||||
isSink(node) // Prevent duplicates along a call chain, since `shellCommand` will include wrappers
|
||||
}
|
||||
}
|
||||
|
||||
module StitchedPathGraph {
|
||||
// There's a different PathNode class for each DataFlowImplN.qll, so we can't simply combine the
|
||||
// PathGraph predicates directly. Instead, we use a newtype so there's a single type that
|
||||
// contains both sets of PathNodes.
|
||||
newtype TMergedPathNode =
|
||||
TPathNode1(DataFlow::PathNode node) or
|
||||
TPathNode2(DataFlow2::PathNode node)
|
||||
|
||||
// this wraps the toString and location predicates so we can use the merged node type in a
|
||||
// selection
|
||||
class MergedPathNode extends TMergedPathNode {
|
||||
string toString() {
|
||||
exists(DataFlow::PathNode n |
|
||||
this = TPathNode1(n) and
|
||||
result = n.toString()
|
||||
)
|
||||
or
|
||||
exists(DataFlow2::PathNode n |
|
||||
this = TPathNode2(n) and
|
||||
result = n.toString()
|
||||
)
|
||||
}
|
||||
|
||||
DataFlow::Node getNode() {
|
||||
exists(DataFlow::PathNode n |
|
||||
this = TPathNode1(n) and
|
||||
result = n.getNode()
|
||||
)
|
||||
or
|
||||
exists(DataFlow2::PathNode n |
|
||||
this = TPathNode2(n) and
|
||||
result = n.getNode()
|
||||
)
|
||||
}
|
||||
|
||||
DataFlow::PathNode getPathNode1() { this = TPathNode1(result) }
|
||||
|
||||
DataFlow2::PathNode getPathNode2() { this = TPathNode2(result) }
|
||||
|
||||
predicate hasLocationInfo(
|
||||
string filepath, int startline, int startcolumn, int endline, int endcolumn
|
||||
) {
|
||||
exists(DataFlow::PathNode n |
|
||||
this = TPathNode1(n) and
|
||||
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
)
|
||||
or
|
||||
exists(DataFlow2::PathNode n |
|
||||
this = TPathNode2(n) and
|
||||
n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
query predicate edges(MergedPathNode a, MergedPathNode b) {
|
||||
exists(DataFlow::PathNode an, DataFlow::PathNode bn |
|
||||
a = TPathNode1(an) and
|
||||
b = TPathNode1(bn) and
|
||||
DataFlow::PathGraph::edges(an, bn)
|
||||
)
|
||||
or
|
||||
exists(DataFlow2::PathNode an, DataFlow2::PathNode bn |
|
||||
a = TPathNode2(an) and
|
||||
b = TPathNode2(bn) and
|
||||
DataFlow2::PathGraph::edges(an, bn)
|
||||
)
|
||||
or
|
||||
// This is where paths from the two configurations are connected. `interestingConcatenation`
|
||||
// is the only thing in this module that's actually specific to the query - everything else is
|
||||
// just using types and predicates from the DataFlow library.
|
||||
interestingConcatenation(a.getNode(), b.getNode()) and
|
||||
a instanceof TPathNode1 and
|
||||
b instanceof TPathNode2
|
||||
}
|
||||
|
||||
query predicate nodes(MergedPathNode mpn, string key, string val) {
|
||||
// here we just need the union of the underlying `nodes` predicates
|
||||
exists(DataFlow::PathNode n |
|
||||
mpn = TPathNode1(n) and
|
||||
DataFlow::PathGraph::nodes(n, key, val)
|
||||
)
|
||||
or
|
||||
exists(DataFlow2::PathNode n |
|
||||
mpn = TPathNode2(n) and
|
||||
DataFlow2::PathGraph::nodes(n, key, val)
|
||||
)
|
||||
}
|
||||
|
||||
query predicate subpaths(
|
||||
MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
|
||||
) {
|
||||
// just forward subpaths from the underlying libraries. This might be slightly awkward when
|
||||
// the concatenation is deep in a call chain.
|
||||
DataFlow::PathGraph::subpaths(arg.getPathNode1(), par.getPathNode1(), ret.getPathNode1(),
|
||||
out.getPathNode1())
|
||||
or
|
||||
DataFlow2::PathGraph::subpaths(arg.getPathNode2(), par.getPathNode2(), ret.getPathNode2(),
|
||||
out.getPathNode2())
|
||||
}
|
||||
}
|
||||
|
||||
import StitchedPathGraph
|
||||
|
||||
from
|
||||
DataFlow::PathNode sourceNode, DataFlow::PathNode concatSink, DataFlow2::PathNode concatSource,
|
||||
DataFlow2::PathNode sinkNode, string taintCause, string callChain,
|
||||
TaintToConcatenationConfiguration conf1, ExecTaintConfiguration conf2
|
||||
where
|
||||
shellCommand(taintedArg, callChain) and
|
||||
tainted(taintSource, taintedArg) and
|
||||
isUserInput(taintSource, taintCause)
|
||||
select taintedArg,
|
||||
"This argument to an OS command is derived from $@ and then passed to " + callChain, taintSource,
|
||||
"user input (" + taintCause + ")"
|
||||
taintCause = sourceNode.getNode().(FlowSource).getSourceType() and
|
||||
conf1.hasFlowPath(sourceNode, concatSink) and
|
||||
interestingConcatenation(concatSink.getNode(), concatSource.getNode()) and // this loses call context
|
||||
conf2.hasFlowPath(concatSource, sinkNode) and
|
||||
shellCommand(sinkAsArgumentIndirection(sinkNode.getNode()), callChain)
|
||||
select sinkAsArgumentIndirection(sinkNode.getNode()), TPathNode1(sourceNode).(MergedPathNode),
|
||||
TPathNode2(sinkNode).(MergedPathNode),
|
||||
"This argument to an OS command is derived from $@, dangerously concatenated into $@, and then passed to "
|
||||
+ callChain, sourceNode, "user input (" + taintCause + ")", concatSource,
|
||||
concatSource.toString()
|
||||
|
||||
Reference in New Issue
Block a user