Crypto: Advanced literal filtering for OpenSSL, used for both unknown and known algorithm literals to improve dataflow performance.

This commit is contained in:
REDMOND\brodes
2025-05-21 18:15:44 -04:00
parent 9637aeca5e
commit a36fd2cb31
4 changed files with 134 additions and 35 deletions

View File

@@ -1,6 +1,7 @@
private import cpp as Language
import semmle.code.cpp.dataflow.new.TaintTracking
import codeql.quantum.experimental.Model
private import experimental.quantum.OpenSSL.AlgorithmCandidateLiteral
module CryptoInput implements InputSig<Language::Location> {
class DataFlowNode = DataFlow::Node;
@@ -89,13 +90,7 @@ module GenericDataSourceFlowConfig implements DataFlow::ConfigSig {
module GenericDataSourceFlow = TaintTracking::Global<GenericDataSourceFlowConfig>;
private class ConstantDataSource extends Crypto::GenericConstantSourceInstance instanceof Literal {
ConstantDataSource() {
// TODO: this is an API specific workaround for OpenSSL, as 'EC' is a constant that may be used
// where typical algorithms are specified, but EC specifically means set up a
// default curve container, that will later be specified explicitly (or if not a default)
// curve is used.
this.getValue() != "EC"
}
ConstantDataSource() { this instanceof OpenSSLAlgorithmCandidateLiteral }
override DataFlow::Node getOutputNode() { result.asExpr() = this }

View File

@@ -0,0 +1,116 @@
import cpp
private import semmle.code.cpp.models.Models
private import semmle.code.cpp.models.interfaces.FormattingFunction
/**
* Holds if a StringLiteral could be an algorithm literal.
* Note: this predicate should only consider restrictions with respect to strings only.
* General restrictions are in the OpenSSLAlgorithmCandidateLiteral class.
*/
private predicate isOpenSSLStringLiteralAlgorithmCandidate(StringLiteral s) {
// 'EC' is a constant that may be used where typical algorithms are specified,
// but EC specifically means set up a default curve container, that will later be
//specified explicitly (or if not a default) curve is used.
s.getValue() != "EC" and
// Ignore empty strings
s.getValue() != "" and
// ignore strings that represent integers, it is possible this could be used for actual
// algorithms but assuming this is not the common case
// NOTE: if we were to revert this restriction, we should still consider filtering "0"
// to be consistent with filtering integer 0
not exists(s.getValue().toInt()) and
// Filter out strings with "%", to filter out format strings
not s.getValue().matches("%\\%%") and
// Filter out strings in brackets or braces
not s.getValue().matches(["[%]", "(%)"]) and
// Filter out all strings of length 1, since these are not algorithm names
s.getValue().length() > 1 and
// Ignore all strings that are in format string calls outputing to a stream (e.g., stdout)
not exists(FormattingFunctionCall f |
exists(f.getOutputArgument(true)) and s = f.(Call).getAnArgument()
) and
// Ignore all format string calls where there is no known out param (resulting string)
// i.e., ignore printf, since it will just ouput a string and not produce a new string
not exists(FormattingFunctionCall f |
// Note: using two ways of determining if there is an out param, since I'm not sure
// which way is canonical
not exists(f.getOutputArgument(false)) and
not f.getTarget().(FormattingFunction).hasTaintFlow(_, _) and
f.(Call).getAnArgument() = s
)
}
/**
* Holds if an IntLiteral could be an algorithm literal.
* Note: this predicate should only consider restrictions with respect to integers only.
* General restrictions are in the OpenSSLAlgorithmCandidateLiteral class.
*/
private predicate isOpenSSLIntLiteralAlgorithmCandidate(Literal l) {
exists(l.getValue().toInt()) and
// Ignore char literals
not l instanceof CharLiteral and
not l instanceof StringLiteral and
// Ignore integer values of 0, commonly referring to NULL only (no known algorithm 0)
// Also ignore integer values greater than 5000
l.getValue().toInt() != 0 and
// ASSUMPTION, no negative numbers are allowed
// RATIONALE: this is a performance improvement to avoid having to trace every number
not exists(UnaryMinusExpr u | u.getOperand() = l) and
// OPENSSL has a special macro for getting every line, ignore it
not exists(MacroInvocation mi | mi.getExpr() = l and mi.getMacroName() = "OPENSSL_LINE") and
// Filter out cases where an int is returned into a pointer, e.g., return NULL;
not exists(ReturnStmt r |
r.getExpr() = l and
r.getEnclosingFunction().getType().getUnspecifiedType() instanceof DerivedType
) and
// A literal as an array index should never be an algorithm
not exists(ArrayExpr op | op.getArrayOffset() = l) and
// A literal used in a bitwise operation may be an algorithm, but not a candidate
// for the purposes of finding applied algorithms
not exists(BinaryBitwiseOperation op | op.getAnOperand() = l) and
not exists(AssignBitwiseOperation op | op.getAnOperand() = l) and
//Filter out cases where an int is assigned or initialized into a pointer, e.g., char* x = NULL;
not exists(Assignment a |
a.getRValue() = l and
a.getLValue().getType().getUnspecifiedType() instanceof DerivedType
) and
not exists(Initializer i |
i.getExpr() = l and
i.getDeclaration().getADeclarationEntry().getUnspecifiedType() instanceof DerivedType
) and
// Filter out cases where the literal is used in any kind of arithmetic operation
not exists(BinaryArithmeticOperation op | op.getAnOperand() = l) and
not exists(UnaryArithmeticOperation op | op.getOperand() = l) and
not exists(AssignArithmeticOperation op | op.getAnOperand() = l)
}
/**
* Any literal that may represent an algorithm for use in an operation, even if an invalid or unknown algorithm.
* The set of all literals is restricted by this class to cases where there is higher
* plausibility that the literal is eventually used as an algorithm.
* Literals are filtered, for example if they are used in a way no indicative of an algorithm use
* such as in an array index, bitwise operation, or logical operation.
* Note a case like this:
* if(algVal == "AES")
*
* "AES" may be a legitimate algorithm literal, but the literal will not be used for an operation directly
* since it is in a equality comparison, hence this case would also be filtered.
*/
class OpenSSLAlgorithmCandidateLiteral extends Literal {
OpenSSLAlgorithmCandidateLiteral() {
(
isOpenSSLIntLiteralAlgorithmCandidate(this) or
isOpenSSLStringLiteralAlgorithmCandidate(this)
) and
// ********* General filters beyond what is filtered for strings and ints *********
// An algorithm literal in a switch case will not be directly applied to an operation.
not exists(SwitchCase sc | sc.getExpr() = this) and
// A literal in a logical operation may be an algorithm, but not a candidate
// for the purposes of finding applied algorithms
not exists(BinaryLogicalOperation op | op.getAnOperand() = this) and
not exists(UnaryLogicalOperation op | op.getOperand() = this) and
// A literal in a comparison operation may be an algorithm, but not a candidate
// for the purposes of finding applied algorithms
not exists(ComparisonOperation op | op.getAnOperand() = this)
}
}

View File

@@ -1,5 +1,5 @@
import cpp
private import experimental.quantum.OpenSSL.LibraryDetector
import experimental.quantum.OpenSSL.AlgorithmCandidateLiteral
predicate resolveAlgorithmFromExpr(Expr e, string normalizedName, string algType) {
resolveAlgorithmFromCall(e, normalizedName, algType)
@@ -89,7 +89,6 @@ class KnownOpenSSLEllipticCurveAlgorithmConstant extends KnownOpenSSLAlgorithmCo
* alias = "dss1" and target = "dsaWithSHA1"
*/
predicate resolveAlgorithmFromCall(Call c, string normalized, string algType) {
isPossibleOpenSSLFunction(c.getTarget()) and
exists(string name, string parsedTargetName |
parsedTargetName =
c.getTarget().getName().replaceAll("EVP_", "").toLowerCase().replaceAll("_", "-") and
@@ -103,7 +102,9 @@ predicate resolveAlgorithmFromCall(Call c, string normalized, string algType) {
* if `e` resolves to a known algorithm.
* If this predicate does not hold, then `e` can be interpreted as being of `UNKNOWN` type.
*/
predicate resolveAlgorithmFromLiteral(Literal e, string normalized, string algType) {
predicate resolveAlgorithmFromLiteral(
OpenSSLAlgorithmCandidateLiteral e, string normalized, string algType
) {
exists(int nid |
nid = getPossibleNidFromLiteral(e) and knownOpenSSLAlgorithmLiteral(_, nid, normalized, algType)
)
@@ -125,28 +126,15 @@ string resolveAlgorithmAlias(string name) {
)
}
private int getPossibleNidFromLiteral(Literal e) {
/**
* Determines if an int literal (NID) is a candidate for being an algorithm literal.
* Checks for common cases where literals are used that would not be indicative of an algorithm.
* Returns the int literal value if the literal is a candidate for an algorithm.
*/
private int getPossibleNidFromLiteral(OpenSSLAlgorithmCandidateLiteral e) {
result = e.getValue().toInt() and
not e instanceof CharLiteral and
not e instanceof StringLiteral and
// ASSUMPTION, no negative numbers are allowed
// RATIONALE: this is a performance improvement to avoid having to trace every number
not exists(UnaryMinusExpr u | u.getOperand() = e) and
// OPENSSL has a special macro for getting every line, ignore it
not exists(MacroInvocation mi | mi.getExpr() = e and mi.getMacroName() = "OPENSSL_LINE") and
// Filter out cases where an int is assigned into a pointer, e.g., char* x = NULL;
not exists(Assignment a |
a.getRValue() = e and a.getLValue().getType().getUnspecifiedType() instanceof PointerType
) and
not exists(Initializer i |
i.getExpr() = e and
i.getDeclaration().getADeclarationEntry().getUnspecifiedType() instanceof PointerType
) and
// Filter out cases where an int is returned into a pointer, e.g., return NULL;
not exists(ReturnStmt r |
r.getExpr() = e and
r.getEnclosingFunction().getType().getUnspecifiedType() instanceof PointerType
)
not e instanceof StringLiteral
}
string getAlgorithmAlias(string alias) {

View File

@@ -2,9 +2,9 @@ import cpp
import semmle.code.cpp.dataflow.new.DataFlow
module OpenSSLModel {
import experimental.quantum.Language
import experimental.quantum.OpenSSL.AlgorithmInstances.OpenSSLAlgorithmInstances
import experimental.quantum.OpenSSL.AlgorithmValueConsumers.OpenSSLAlgorithmValueConsumers
import experimental.quantum.OpenSSL.Operations.OpenSSLOperations
import experimental.quantum.OpenSSL.Random
import AlgorithmInstances.OpenSSLAlgorithmInstances
import AlgorithmValueConsumers.OpenSSLAlgorithmValueConsumers
import Operations.OpenSSLOperations
import Random
import AlgorithmCandidateLiteral
}