Files
codeql/python/ql/src/Security/CWE-798/HardcodedCredentials.ql
yoff 408ba6218f Python: switch dataflow library to new (shared) CFG + SSA
Flips the Python dataflow trunk from the legacy CFG (semmle/python/Flow.qll)
and legacy ESSA SSA (semmle/python/essa/*) to the new shared CFG facade
(semmle.python.controlflow.internal.Cfg) and the new SSA adapter
(semmle.python.dataflow.new.internal.SsaImpl), both introduced
additively in the preceding PRs in this stack.

This is the trunk-flip equivalent of the original draft PR #21894 (kept
around as documentation), rebased on top of the four preparatory PRs:

  P1: Remove AstNode.getAFlowNode() and rewrite callers (#21919).
  P2: Qualify Flow.qll's AST references with Py:: prefix (#21920).
  P3: Add new shared-CFG-backed control flow graph (#21921).
  P4: Add new shared-SSA-backed SSA adapter (#21923).

The Python dataflow library (semmle/python/dataflow/new/) now imports
the new CFG facade and SSA adapter. All CFG-typed predicates
(ControlFlowNode, CallNode, BasicBlock, NameNode, AttrNode, ...) are
qualified with the Cfg:: prefix; SSA references switch from
EssaVariable/EssaDefinition to SsaImpl::Definition/SourceVariable.

GuardNode is redesigned to use the new CFG's outcome-node model
(isAfterTrue / isAfterFalse) instead of the legacy ConditionBlock +
flipped indirection. Only BarrierGuard<...> is preserved as public
API.

Framework files (Bottle, FastApi, Django, Tornado, Pyramid, Stdlib,
...) are updated to take CFG nodes from the new facade.

A handful of dataflow consistency tweaks for the new CFG:
- Augmented-assignment targets are treated as both load and store.
- 'from X import *' produces uncertain SSA writes for unknown names.
- CFG nodes are canonicalised so dataflow does not see equivalent
  pre/post-order pairs as distinct nodes.

Two AST tweaks for the new CFG:
- AstNodeImpl: omit PEP 695 type-parameter names from
  FunctionDefExpr / ClassDefExpr children.
- ImportResolution: drop the legacy essa import.

Test churn (~175 files): reblessed library- and query-test .expected
files reflect slightly different CFG granularity, different toString
output, and a handful of true alert deltas in security queries.

Verification: all 367 lib + src + consistency-queries compile clean.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-22 13:46:43 +00:00

148 lines
4.8 KiB
Plaintext

/**
* @name Hard-coded credentials
* @description Credentials are hard coded in the source code of the application.
* @kind path-problem
* @problem.severity error
* @security-severity 9.8
* @precision low
* @id py/hardcoded-credentials
* @tags security
* external/cwe/cwe-259
* external/cwe/cwe-321
* external/cwe/cwe-798
*/
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking
import semmle.python.filters.Tests
private import semmle.python.dataflow.new.internal.DataFlowDispatch as DataFlowDispatch
private import semmle.python.dataflow.new.internal.Builtins::Builtins as Builtins
private import semmle.python.frameworks.data.ModelsAsData
private import semmle.python.controlflow.internal.Cfg as Cfg
bindingset[char, fraction]
predicate fewer_characters_than(StringLiteral str, string char, float fraction) {
exists(string text, int chars |
text = str.getText() and
chars = count(int i | text.charAt(i) = char)
|
/* Allow one character */
chars = 1 or
chars < text.length() * fraction
)
}
predicate possible_reflective_name(string name) {
any(Function f).getName() = name
or
any(Class c).getName() = name
or
any(Module m).getName() = name
or
exists(Builtins::likelyBuiltin(name))
}
int char_count(StringLiteral str) { result = count(string c | c = str.getText().charAt(_)) }
predicate capitalized_word(StringLiteral str) { str.getText().regexpMatch("[A-Z][a-z]+") }
predicate format_string(StringLiteral str) { str.getText().matches("%{%}%") }
predicate maybeCredential(Cfg::ControlFlowNode f) {
/* A string that is not too short and unlikely to be text or an identifier. */
exists(StringLiteral str | str = f.getNode() |
/* At least 10 characters */
str.getText().length() > 9 and
/* Not too much whitespace */
fewer_characters_than(str, " ", 0.05) and
/* or underscores */
fewer_characters_than(str, "_", 0.2) and
/* Not too repetitive */
exists(int chars | chars = char_count(str) |
chars > 15 or
chars * 3 > str.getText().length() * 2
) and
not possible_reflective_name(str.getText()) and
not capitalized_word(str) and
not format_string(str)
)
or
/* Or, an integer with over 32 bits */
exists(IntegerLiteral lit | f.getNode() = lit |
not exists(lit.getValue()) and
/* Not a set of flags or round number */
not lit.getN().matches("%00%")
)
}
class HardcodedValueSource extends DataFlow::Node {
HardcodedValueSource() { maybeCredential(this.asCfgNode()) }
}
class CredentialSink extends DataFlow::Node {
CredentialSink() {
exists(string s | s.matches("credentials-%") |
// Actual sink-type will be things like `credentials-password` or `credentials-username`
ModelOutput::sinkNode(this, s)
)
or
exists(string name |
name.regexpMatch(getACredentialRegex()) and
not name.matches("%file")
|
exists(DataFlowDispatch::ArgumentPosition pos | pos.isKeyword(name) |
this.(DataFlow::ArgumentNode).argumentOf(_, pos)
)
or
exists(Keyword k | k.getArg() = name and this.asCfgNode().getNode() = k.getValue())
or
exists(Cfg::CompareNode cmp, Cfg::NameNode n | n.getId() = name |
cmp.operands(this.asCfgNode(), any(Eq eq), n)
or
cmp.operands(n, any(Eq eq), this.asCfgNode())
)
)
}
}
class CredentialSanitizer extends DataFlow::Node {
CredentialSanitizer() {
exists(string s | s.matches("credentials-%") |
// Whatever the string, this will sanitize flow to all credential sinks.
ModelOutput::barrierNode(this, s)
)
}
}
/**
* Gets a regular expression for matching names of locations (variables, parameters, keys) that
* indicate the value being held is a credential.
*/
private string getACredentialRegex() {
result = "(?i).*pass(wd|word|code|phrase)(?!.*question).*" or
result = "(?i).*(puid|username|userid).*" or
result = "(?i).*(cert)(?!.*(format|name)).*"
}
private module HardcodedCredentialsConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof HardcodedValueSource }
predicate isSink(DataFlow::Node sink) { sink instanceof CredentialSink }
predicate isBarrier(DataFlow::Node node) { node instanceof CredentialSanitizer }
predicate observeDiffInformedIncrementalMode() { any() }
}
module HardcodedCredentialsFlow = TaintTracking::Global<HardcodedCredentialsConfig>;
import HardcodedCredentialsFlow::PathGraph
from HardcodedCredentialsFlow::PathNode src, HardcodedCredentialsFlow::PathNode sink
where
HardcodedCredentialsFlow::flowPath(src, sink) and
not any(TestScope test).contains(src.getNode().asCfgNode().getNode())
select src.getNode(), src, sink, "This hardcoded value is $@.", sink.getNode(),
"used as credentials"