Merge branch 'main' into main

This commit is contained in:
Mathew Payne
2023-04-17 15:10:32 +01:00
committed by GitHub
33 changed files with 806 additions and 176 deletions

View File

@@ -16,160 +16,133 @@
import cpp
import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.ir.dataflow.DataFlow
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering
/**
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
*
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
* of `call`, and `vn` is the value number of `instr.`
*/
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
output = call.getOutputArgument(index).getFullyConverted() and
instr.getConvertedResultExpression() = output and
vn.getAnInstruction() = instr
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
pragma[nomagic]
predicate revFlow0(Node n) {
isSink(_, _, n, _)
or
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
}
/**
* Holds if `instr` is control-flow reachable in 0 or more steps from
* a call to a `scanf`-like function.
* Holds if `n` represents an uninitialized stack-allocated variable, or a
* newly (and presumed uninitialized) heap allocation.
*/
predicate isUninitialized(Node n) {
exists(n.asUninitialized()) or
n.asIndirectExpr(1) instanceof AllocationExpr
}
pragma[nomagic]
predicate fwdFlow0(Instruction instr) {
isSource(_, _, instr, _, _)
or
exists(Instruction prev |
fwdFlow0(prev) and
prev.getASuccessor() = instr
predicate fwdFlow0(Node n) {
revFlow0(n) and
(
isUninitialized(n)
or
exists(Node prev |
fwdFlow0(prev) and
localFlowStep(prev, n)
)
)
}
/**
* Holds if `instr` is part of the IR translation of `access` that
* is not an expression being deallocated, and `instr` has value
* number `vn`.
*/
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
instr.getAst() = access and
not any(DeallocationExpr dealloc).getFreedExpr() = access and
vn.getAnInstruction() = instr
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
input = call.getOutputArgument(index) and
n.asIndirectExpr() = input
}
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
* argument that has not been previously initialized.
*/
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
}
/**
* Holds if `call` is a `scanf`-like function that may write to `output` at
* index `index` and `n` is the dataflow node that represents the data after
* it has been written to by `call`.
*/
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
isRelevantScanfCall(call, index, output) and
output = call.getOutputArgument(index) and
n.asDefiningArgument() = output
}
/**
* Holds if `n` is reachable from an output argument of a relevant call to
* a `scanf`-like function.
*/
pragma[nomagic]
predicate fwdFlow(Node n) {
isSource(_, _, n, _)
or
exists(Node prev |
fwdFlow(prev) and
localFlowStep(prev, n) and
not isSanitizerOut(prev)
)
}
/** Holds if `n` should not have outgoing flow. */
predicate isSanitizerOut(Node n) {
// We disable flow out of sinks to reduce result duplication
isSink(n, _)
or
// If the node is being passed to a function it may be
// modified, and thus it's safe to later read the value.
exists(n.asIndirectArgument())
}
/**
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
* argument of a deallocation expression.
*/
predicate isSink(Node n, Expr e) {
n.asExpr() = e and
not any(DeallocationExpr dealloc).getFreedExpr() = e
}
/**
* Holds if `n` is part of a path from a call to a `scanf`-like function
* to a use of the written variable.
*/
pragma[nomagic]
predicate revFlow0(Instruction instr) {
fwdFlow0(instr) and
predicate revFlow(Node n) {
fwdFlow(n) and
(
isSink(instr, _, _)
isSink(n, _)
or
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
)
}
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
*/
pragma[nomagic]
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
revFlow0(instr) and
(
isSource(_, _, instr, vn, _)
or
exists(Instruction prev |
fwdFlow(prev, vn) and
prev.getASuccessor() = instr and
not isBarrier(instr, vn)
exists(Node succ |
revFlow(succ) and
localFlowStep(n, succ) and
not isSanitizerOut(n)
)
)
}
/**
* Holds if `instr` is part of a path from a call to a `scanf`-like function
* that writes to a variable with value number `vn`, without passing through
* redefinitions of the variable.
*
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
* control-flow reachable from an argument to a `scanf`-like function call.
*/
pragma[nomagic]
predicate revFlow(Instruction instr, ValueNumber vn) {
fwdFlow(instr, pragma[only_bind_out](vn)) and
(
isSink(instr, _, vn)
or
exists(Instruction succ | revFlow(succ, vn) |
instr.getASuccessor() = succ and
not isBarrier(succ, vn)
)
)
/** A local flow step, restricted to relevant dataflow nodes. */
private predicate step(Node n1, Node n2) {
revFlow(n1) and
revFlow(n2) and
localFlowStep(n1, n2)
}
/**
* A type that bundles together a reachable instruction with the appropriate
* value number (i.e., the value number that's transferred from the source
* to the sink).
*/
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
class Node extends MkNode {
ValueNumber vn;
Instruction instr;
Node() { this = MkNode(instr, vn) }
final string toString() { result = instr.toString() }
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
final Location getLocation() { result = instr.getLocation() }
}
/**
* Holds if `instr` is an instruction with value number `vn` that is
* used in a store operation, or is overwritten by another call to
* a `scanf`-like function.
*/
private predicate isBarrier(Instruction instr, ValueNumber vn) {
// We only need to compute barriers for instructions that we
// managed to hit during the initial flow stage.
revFlow0(pragma[only_bind_into](instr)) and
valueNumber(instr) = vn and
exists(Expr e | instr.getAst() = e |
instr = any(StoreInstruction s).getDestinationAddress()
or
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
)
}
/** Holds if `n1` steps to `n2` in a single step. */
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
/**
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
* an instruction that is part of the translation of `access` which is a transitive
* control-flow successor of `call`.
*
* Furthermore, `source` and `sink` have identical global value numbers.
* a dataflow node that represents the expression `e`.
*/
predicate hasFlow(
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
) {
exists(ValueNumber vn |
isSource(call, index, source, vn, _) and
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
isSink(sink, access, vn)
)
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
isSource(call, index, source, _) and
hasFlow(source, sink) and
isSink(sink, e)
}
/**
@@ -177,7 +150,7 @@ predicate hasFlow(
* success in writing the output argument at index `index`.
*/
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
isSource(call, index, _, _, _) and
isSource(call, index, _, _) and
result =
index + 1 -
count(ScanfFormatLiteral f, int n |
@@ -191,7 +164,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
* at least `minGuard`.
*/
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
exists(int index |
hasFlow(_, call, index, _, e) and
minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +184,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
exists(GuardCondition g, Expr left, Expr right |
right = g.getAChild() and
value = left.getValue().toInt() and
DataFlow::localExprFlow(call, right)
localExprFlow(call, right)
|
g.ensuresEq(left, right, 0, result, true) and op = "=="
or
@@ -221,9 +194,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
)
}
from ScanfFunctionCall call, Access access, int minGuard
where hasNonGuardedAccess(call, access, minGuard)
select access,
from ScanfFunctionCall call, Expr e, int minGuard
where hasNonGuardedAccess(call, e, minGuard)
select e,
"This variable is read, but may not have been written. " +
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
call.toString()

View File

@@ -1,9 +1,8 @@
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
@@ -12,13 +11,9 @@
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |

View File

@@ -48,7 +48,7 @@ int main()
int i = 0;
scanf("%d", &i);
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
use(i); // GOOD. Design choice: already initialized variables are fine.
}
{
@@ -261,7 +261,7 @@ int main()
i = 0;
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD
}
{
@@ -269,7 +269,7 @@ int main()
set_by_ref(i);
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD [FALSE POSITIVE]
}
{
@@ -277,7 +277,7 @@ int main()
set_by_ptr(&i);
scanf("%d", &i);
use(i); // BAD
use(i); // GOOD [FALSE POSITIVE]
}
{
@@ -299,7 +299,7 @@ int main()
int *ptr_i = &i;
scanf("%d", &i);
use(*ptr_i); // BAD: may not have written `i`
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
}
{
@@ -307,7 +307,7 @@ int main()
int *ptr_i = &i;
scanf("%d", ptr_i);
use(i); // BAD: may not have written `*ptr_i`
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
}
{
@@ -427,5 +427,5 @@ void scan_and_write() {
void scan_and_static_variable() {
static int i;
scanf("%d", &i);
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
use(i); // GOOD: static variables are always 0-initialized
}

View File

@@ -157,10 +157,12 @@ private class FunctionalExtendCallShallow extends ExtendCall {
}
/**
* A taint propagating data flow edge from the objects flowing into an extend call to its return value
* A value-preserving data flow edge from the objects flowing into an extend call to its return value
* and to the source of the destination object.
*
* Since all object properties are preserved, we model this as a value-preserving step.
*/
private class ExtendCallTaintStep extends TaintTracking::SharedTaintStep {
private class ExtendCallStep extends PreCallGraphStep {
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
exists(ExtendCall extend |
pred = extend.getASourceOperand() and succ = extend.getDestinationOperand().getALocalSource()

View File

@@ -806,6 +806,10 @@ private predicate basicFlowStepNoBarrier(
callStep(pred, succ) and
summary = PathSummary::call()
or
// Implied receiver flow
CallGraph::impliedReceiverStep(pred, succ) and
summary = PathSummary::call()
or
// Flow out of function
returnStep(pred, succ) and
summary = PathSummary::return()

View File

@@ -241,22 +241,26 @@ module CallGraph {
)
}
private predicate shouldTrackObjectWithMethods(DataFlow::SourceNode node) {
private DataFlow::FunctionNode getAMethodOnPlainObject(DataFlow::SourceNode node) {
(
(
node instanceof DataFlow::ObjectLiteralNode
or
node instanceof DataFlow::FunctionNode
) and
node.getAPropertySource() instanceof DataFlow::FunctionNode
result = node.getAPropertySource()
or
exists(node.(DataFlow::ObjectLiteralNode).getPropertyGetter(_))
result = node.(DataFlow::ObjectLiteralNode).getPropertyGetter(_)
or
exists(node.(DataFlow::ObjectLiteralNode).getPropertySetter(_))
result = node.(DataFlow::ObjectLiteralNode).getPropertySetter(_)
) and
not node.getTopLevel().isExterns()
}
private predicate shouldTrackObjectWithMethods(DataFlow::SourceNode node) {
exists(getAMethodOnPlainObject(node))
}
/**
* Gets a step summary for tracking object literals.
*
@@ -273,4 +277,22 @@ module CallGraph {
or
StepSummary::step(getAnAllocationSiteRef(node), result, objectWithMethodsStep())
}
/**
* Holds if `pred` is assumed to flow to `succ` because a method is stored on an object that is assumed
* to be the receiver of calls to that method.
*
* For example, object literal below is assumed to flow to the receiver of the `foo` function:
* ```js
* let obj = {};
* obj.foo = function() {}
* ```
*/
cached
predicate impliedReceiverStep(DataFlow::SourceNode pred, DataFlow::SourceNode succ) {
exists(DataFlow::SourceNode host |
pred = getAnAllocationSiteRef(host) and
succ = getAMethodOnPlainObject(host).getReceiver()
)
}
}

View File

@@ -94,6 +94,10 @@ private module Cached {
DataFlow::localFieldStep(pred, succ) and
summary = LevelStep()
or
// Implied flow of host object into 'this' of a method
CallGraph::impliedReceiverStep(pred, succ) and
summary = CallStep()
or
exists(string prop |
basicStoreStep(pred, succ, prop) and
summary = StoreStep(prop)

View File

@@ -55,6 +55,22 @@ class Configuration extends TaintTracking::Configuration {
)
}
override predicate isSanitizerEdge(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel lbl
) {
// Suppress the value-preserving step src -> dst in `extend(dst, src)`. This is modeled as a value-preserving
// step because it preserves all properties, but the destination is not actually Object.prototype.
exists(ExtendCall call |
pred = call.getASourceOperand() and
(
succ = call.getDestinationOperand().getALocalSource()
or
succ = call
) and
lbl instanceof ObjectPrototype
)
}
override predicate isAdditionalFlowStep(
DataFlow::Node pred, DataFlow::Node succ, DataFlow::FlowLabel inlbl, DataFlow::FlowLabel outlbl
) {

View File

@@ -31,6 +31,13 @@ module UnsafeJQueryPlugin {
*/
abstract class Sanitizer extends DataFlow::Node { }
/**
* The receiver of a function, seen as a sanitizer.
*
* Plugins often do `$(this)` to coerce an existing DOM element to a jQuery object.
*/
private class ThisSanitizer extends Sanitizer instanceof DataFlow::ThisNode { }
/**
* An argument that may act as an HTML fragment rather than a CSS selector, as a sink for remote unsafe jQuery plugins.
*/

View File

@@ -0,0 +1,5 @@
---
category: minorAnalysis
---
* Improved the call graph to better handle the case where a function is stored on
a plain object and subsequently copied to a new host object via an `extend` call.

View File

@@ -36,6 +36,8 @@ nodes
| tst-UntrustedDataToExternalAPI.js:33:14:33:22 | untrusted |
| tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted |
| tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted |
| tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
| tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} |
| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} |
| tst-UntrustedDataToExternalAPI.js:42:8:42:16 | untrusted |
@@ -83,6 +85,8 @@ edges
| tst-UntrustedDataToExternalAPI.js:24:21:24:41 | JSON.pa ... rusted) | tst-UntrustedDataToExternalAPI.js:24:20:24:42 | [JSON.p ... usted)] |
| tst-UntrustedDataToExternalAPI.js:24:21:24:41 | JSON.pa ... rusted) | tst-UntrustedDataToExternalAPI.js:24:20:24:42 | [JSON.p ... usted)] |
| tst-UntrustedDataToExternalAPI.js:24:32:24:40 | untrusted | tst-UntrustedDataToExternalAPI.js:24:21:24:41 | JSON.pa ... rusted) |
| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} | tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} | tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} |
| tst-UntrustedDataToExternalAPI.js:42:8:42:16 | untrusted | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} |
| tst-UntrustedDataToExternalAPI.js:42:8:42:16 | untrusted | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} |
| tst-UntrustedDataToExternalAPI.js:43:8:43:16 | untrusted | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} |
@@ -101,4 +105,5 @@ edges
| tst-UntrustedDataToExternalAPI.js:30:13:30:30 | getDeepUntrusted() | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:30:13:30:30 | getDeepUntrusted() | Call to external-lib() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
| tst-UntrustedDataToExternalAPI.js:33:14:33:22 | untrusted | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:33:14:33:22 | untrusted | Call to external-lib.get.[callback].[param 'res'].send() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
| tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:34:34:34:42 | untrusted | Call to external-lib.get.[callback].[param 'req'].app.locals.something.foo() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
| tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:41:7:41:8 | {} | Call to lodash.merge() [param 0] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |
| tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | tst-UntrustedDataToExternalAPI.js:41:11:45:1 | {\\n x ... usted\\n} | Call to lodash.merge() [param 1] with untrusted data from $@. | tst-UntrustedDataToExternalAPI.js:3:17:3:27 | window.name | window.name |

View File

@@ -1119,6 +1119,10 @@ nodes
| tst.js:494:18:494:30 | location.hash |
| tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash |
| tst.js:501:43:501:62 | window.location.hash |
| typeahead.js:20:13:20:45 | target |
| typeahead.js:20:22:20:45 | documen ... .search |
| typeahead.js:20:22:20:45 | documen ... .search |
@@ -2271,6 +2275,10 @@ edges
| tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| typeahead.js:20:13:20:45 | target | typeahead.js:21:12:21:17 | target |
| typeahead.js:20:22:20:45 | documen ... .search | typeahead.js:20:13:20:45 | target |
| typeahead.js:20:22:20:45 | documen ... .search | typeahead.js:20:13:20:45 | target |
@@ -2559,6 +2567,7 @@ edges
| tst.js:486:22:486:24 | url | tst.js:471:13:471:36 | documen ... .search | tst.js:486:22:486:24 | url | Cross-site scripting vulnerability due to $@. | tst.js:471:13:471:36 | documen ... .search | user-provided value |
| tst.js:491:23:491:45 | locatio ... bstr(1) | tst.js:491:23:491:35 | location.hash | tst.js:491:23:491:45 | locatio ... bstr(1) | Cross-site scripting vulnerability due to $@. | tst.js:491:23:491:35 | location.hash | user-provided value |
| tst.js:494:18:494:40 | locatio ... bstr(1) | tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) | Cross-site scripting vulnerability due to $@. | tst.js:494:18:494:30 | location.hash | user-provided value |
| tst.js:501:33:501:63 | decodeU ... n.hash) | tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) | Cross-site scripting vulnerability due to $@. | tst.js:501:43:501:62 | window.location.hash | user-provided value |
| typeahead.js:25:18:25:20 | val | typeahead.js:20:22:20:45 | documen ... .search | typeahead.js:25:18:25:20 | val | Cross-site scripting vulnerability due to $@. | typeahead.js:20:22:20:45 | documen ... .search | user-provided value |
| v-html.vue:2:8:2:23 | v-html=tainted | v-html.vue:6:42:6:58 | document.location | v-html.vue:2:8:2:23 | v-html=tainted | Cross-site scripting vulnerability due to $@. | v-html.vue:6:42:6:58 | document.location | user-provided value |
| various-concat-obfuscations.js:4:4:4:31 | "<div>" ... </div>" | various-concat-obfuscations.js:2:16:2:39 | documen ... .search | various-concat-obfuscations.js:4:4:4:31 | "<div>" ... </div>" | Cross-site scripting vulnerability due to $@. | various-concat-obfuscations.js:2:16:2:39 | documen ... .search | user-provided value |

View File

@@ -1131,6 +1131,10 @@ nodes
| tst.js:494:18:494:30 | location.hash |
| tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash |
| tst.js:501:43:501:62 | window.location.hash |
| typeahead.js:9:28:9:30 | loc |
| typeahead.js:9:28:9:30 | loc |
| typeahead.js:9:28:9:30 | loc |
@@ -2333,6 +2337,10 @@ edges
| tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:494:18:494:30 | location.hash | tst.js:494:18:494:40 | locatio ... bstr(1) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| tst.js:501:43:501:62 | window.location.hash | tst.js:501:33:501:63 | decodeU ... n.hash) |
| typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc |
| typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc |
| typeahead.js:9:28:9:30 | loc | typeahead.js:10:16:10:18 | loc |

View File

@@ -313,7 +313,7 @@ function basicExceptions() {
}
function handlebarsSafeString() {
return new Handlebars.SafeString(location); // NOT OK!
return new Handlebars.SafeString(location); // NOT OK!
}
function test2() {
@@ -355,15 +355,15 @@ function thisNodes() {
var target = document.location.search
this.html(target); // NOT OK. (this is a jQuery object)
this.innerHTML = target // OK. (this is a jQuery object)
this.each(function (i, e) {
this.innerHTML = target; // NOT OK. (this is a DOM-node);
this.html(target); // OK. (this is a DOM-node);
e.innerHTML = target; // NOT OK.
});
}
$.fn[pluginName] = myPlugin;
$.fn[pluginName] = myPlugin;
}
@@ -380,7 +380,7 @@ function test() {
function test() {
var target = document.location.search
$('myId').html(target); // NOT OK
$('myId').html(target.taint); // NOT OK
@@ -401,7 +401,7 @@ function test() {
if (random()) {return;}
$('myId').html(target.taint6); // OK
if (random()) {target.taint7 = "safe";}
$('myId').html(target.taint7); // NOT OK
@@ -493,3 +493,13 @@ function urlStuff() {
const myHistory = require('history').createBrowserHistory();
myHistory.push(location.hash.substr(1)); // NOT OK
}
function Foo() {
this.foo = document;
var obj = {
bar: function() {
this.foo.body.innerHTML = decodeURI(window.location.hash); // NOT OK
}
};
Object.assign(this, obj);
}

View File

@@ -188,8 +188,18 @@
}
// extending options
options = $.extend( {}, options );
var target = $( options.of ); // NOT OK
console.log(target);
};
$.fn.blockReceiver = function( options ) {
$.extend({
foo() {
$(this); // OK
}
},
options,
);
};
});

View File

@@ -103,11 +103,23 @@ app.get('/bar', (req, res) => {
let object = {};
object[taint][taint] = taint; // NOT OK
const bad = ["__proto__", "constructor"];
if (bad.includes(taint)) {
return;
}
object[taint][taint] = taint; // OK
});
});
app.get('/assign', (req, res) => {
let taint = String(req.query.data);
let plainObj = {};
let object = Object.assign({}, plainObj[taint]);
object[taint] = taint; // OK - 'object' is not Object.prototype itself (but possibly a copy)
let dest = {};
Object.assign(dest, plainObj[taint]);
dest[taint] = taint; // OK - 'dest' is not Object.prototype itself (but possibly a copy)
});

View File

@@ -5,6 +5,8 @@
#include <iostream>
#include <optional>
#include <vector>
#include <binlog/binlog.hpp>
#include <binlog/adapt_stdoptional.hpp>
#include "{{trap_library}}/TrapLabel.h"
#include "{{trap_library}}/TrapTagTraits.h"
@@ -80,3 +82,9 @@ struct detail::ToTrapClassFunctor<{{name}}Tag> {
};
{{/classes}}
}
{{#classes}}
{{#final}}
BINLOG_ADAPT_STRUCT(codeql::{{name}}, id{{> cpp_list_fields}});
{{/final}}
{{/classes}}

View File

@@ -0,0 +1 @@
{{#bases}}{{#ref}}{{> cpp_list_fields}}{{/ref}}{{/bases}}{{#fields}}, {{field_name}}{{/fields}}

View File

@@ -4,6 +4,7 @@
#include <iostream>
#include <string>
#include <binlog/binlog.hpp>
#include "{{trap_library_dir}}/TrapLabel.h"
#include "{{trap_library_dir}}/TrapTagTraits.h"
@@ -43,3 +44,7 @@ struct ToBindingTrapFunctor<{{type}}> {
{{/id}}
{{/traps}}
}
{{#traps}}
BINLOG_ADAPT_STRUCT(codeql::{{name}}Trap{{#fields}}, {{field_name}}{{/fields}});
{{/traps}}

View File

@@ -4,6 +4,8 @@
The Swift CodeQL package is an experimental and unsupported work in progress.
##
## Building the Swift extractor
First ensure you have Bazel installed, for example with
@@ -28,7 +30,9 @@ set up the search path
in [the per-user CodeQL configuration file](https://docs.github.com/en/code-security/codeql-cli/using-the-codeql-cli/specifying-command-options-in-a-codeql-configuration-file#using-a-codeql-configuration-file)
.
## Code generation
## Development
### Code generation
Run
@@ -41,7 +45,27 @@ to update generated files. This can be shortened to
You can also run `../misc/codegen/codegen.py`, as long as you are beneath the `swift` directory.
## IDE setup
### Logging configuration
A log file is produced for each run under `CODEQL_EXTRACTOR_SWIFT_LOG_DIR` (the usual DB log directory).
You can use the environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` to configure levels for
loggers and outputs. This must have the form of a comma separated `spec:min_level` list, where
`spec` is either a glob pattern (made up of alphanumeric, `/`, `*` and `.` characters) for
matching logger names or one of `out:bin`, `out:text` or `out:console`, and `min_level` is one
of `trace`, `debug`, `info`, `warning`, `error`, `critical` or `no_logs` to turn logs completely off.
Current output default levels are no binary logs, `info` logs or higher in the text file and `warning` logs or higher on
standard error. By default, all loggers are configured with the lowest logging level of all outputs (`info` by default).
Logger names are visible in the textual logs between `[...]`. Examples are `extractor/dispatcher`
or `extractor/<source filename>.trap`. An example of `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` usage is the following:
```bash
export CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS=out:console:trace,out:text:no_logs,*:warning,*.trap:trace
```
This will turn off generation of a text log file, redirecting all logs to standard error, but will make all loggers only
write warnings or above, except for trap emission logs which will output all logs.
### CLion and the native bazel plugin
@@ -84,3 +108,7 @@ In particular for breakpoints to work you might need to setup the following remo
|-------------|--------------------------------------|
| `swift` | `/absolute/path/to/codeql/swift` |
| `bazel-out` | `/absolute/path/to/codeql/bazel-out` |
### Thread safety
The extractor is single-threaded, and there was no effort to make anything in it thread-safe.

View File

@@ -8,6 +8,7 @@ swift_cc_library(
deps = [
"//swift/extractor/config",
"//swift/extractor/infra/file",
"//swift/extractor/infra/log",
"//swift/extractor/trap",
"//swift/third_party/swift-llvm-support",
],

View File

@@ -13,6 +13,7 @@
#include "swift/extractor/infra/SwiftLocationExtractor.h"
#include "swift/extractor/infra/SwiftBodyEmissionStrategy.h"
#include "swift/extractor/config/SwiftExtractorState.h"
#include "swift/extractor/infra/log/SwiftLogging.h"
namespace codeql {
@@ -151,7 +152,9 @@ class SwiftDispatcher {
return *l;
}
waitingForNewLabel = e;
// TODO: add tracing logs for visited stuff, maybe within the translators?
visit(e, std::forward<Args>(args)...);
Log::flush();
// TODO when everything is moved to structured C++ classes, this should be moved to createEntry
if (auto l = store.get(e)) {
if constexpr (IsLocatable<E>) {
@@ -329,6 +332,7 @@ class SwiftDispatcher {
SwiftBodyEmissionStrategy& bodyEmissionStrategy;
Store::Handle waitingForNewLabel{std::monostate{}};
std::unordered_set<swift::ModuleDecl*> encounteredModules;
Logger logger{"dispatcher"};
};
} // namespace codeql

View File

@@ -36,6 +36,8 @@ class TargetFile {
return *this;
}
const std::filesystem::path& target() const { return targetPath; }
private:
TargetFile(const std::filesystem::path& target,
const std::filesystem::path& targetDir,

View File

@@ -0,0 +1,7 @@
cc_library(
name = "log",
srcs = glob(["*.cpp"]),
hdrs = glob(["*.h"]),
visibility = ["//visibility:public"],
deps = ["@binlog"],
)

View File

@@ -0,0 +1,163 @@
#include "swift/extractor/infra/log/SwiftLogging.h"
#include <filesystem>
#include <stdlib.h>
#include <optional>
#define LEVEL_REGEX_PATTERN "trace|debug|info|warning|error|critical|no_logs"
BINLOG_ADAPT_ENUM(codeql::Log::Level, trace, debug, info, warning, error, critical, no_logs)
namespace codeql {
namespace {
using LevelRule = std::pair<std::regex, Log::Level>;
using LevelRules = std::vector<LevelRule>;
Log::Level getLevelFor(std::string_view name, const LevelRules& rules, Log::Level dflt) {
for (auto it = rules.rbegin(); it != rules.rend(); ++it) {
if (std::regex_match(std::begin(name), std::end(name), it->first)) {
return it->second;
}
}
return dflt;
}
const char* getEnvOr(const char* var, const char* dflt) {
if (const char* ret = getenv(var)) {
return ret;
}
return dflt;
}
std::string_view matchToView(std::csub_match m) {
return {m.first, static_cast<size_t>(m.length())};
}
Log::Level stringToLevel(std::string_view v) {
if (v == "trace") return Log::Level::trace;
if (v == "debug") return Log::Level::debug;
if (v == "info") return Log::Level::info;
if (v == "warning") return Log::Level::warning;
if (v == "error") return Log::Level::error;
if (v == "critical") return Log::Level::critical;
return Log::Level::no_logs;
}
Log::Level matchToLevel(std::csub_match m) {
return stringToLevel(matchToView(m));
}
} // namespace
std::vector<std::string> Log::collectSeverityRulesAndReturnProblems(const char* envVar) {
std::vector<std::string> problems;
if (auto levels = getEnvOr(envVar, nullptr)) {
// expect comma-separated <glob pattern>:<log severity>
std::regex comma{","};
std::regex levelAssignment{R"((?:([*./\w]+)|(?:out:(bin|text|console))):()" LEVEL_REGEX_PATTERN
")"};
std::cregex_token_iterator begin{levels, levels + strlen(levels), comma, -1};
std::cregex_token_iterator end{};
for (auto it = begin; it != end; ++it) {
std::cmatch match;
if (std::regex_match(it->first, it->second, match, levelAssignment)) {
auto level = matchToLevel(match[3]);
if (match[1].matched) {
auto pattern = match[1].str();
// replace all "*" with ".*" and all "." with "\.", turning the glob pattern into a regex
std::string::size_type pos = 0;
while ((pos = pattern.find_first_of("*.", pos)) != std::string::npos) {
pattern.insert(pos, (pattern[pos] == '*') ? "." : "\\");
pos += 2;
}
sourceRules.emplace_back(pattern, level);
} else {
auto out = matchToView(match[2]);
if (out == "bin") {
binary.level = level;
} else if (out == "text") {
text.level = level;
} else if (out == "console") {
console.level = level;
}
}
} else {
problems.emplace_back("Malformed log level rule: " + it->str());
}
}
}
return problems;
}
void Log::configure() {
// as we are configuring logging right now, we collect problems and log them at the end
auto problems = collectSeverityRulesAndReturnProblems("CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS");
if (text || binary) {
std::filesystem::path logFile = getEnvOr("CODEQL_EXTRACTOR_SWIFT_LOG_DIR", ".");
logFile /= logRootName;
logFile /= std::to_string(std::chrono::system_clock::now().time_since_epoch().count());
std::error_code ec;
std::filesystem::create_directories(logFile.parent_path(), ec);
if (!ec) {
if (text) {
logFile.replace_extension(".log");
textFile.open(logFile);
if (!textFile) {
problems.emplace_back("Unable to open text log file " + logFile.string());
text.level = Level::no_logs;
}
}
if (binary) {
logFile.replace_extension(".blog");
binary.output.open(logFile, std::fstream::out | std::fstream::binary);
if (!binary.output) {
problems.emplace_back("Unable to open binary log file " + logFile.string());
binary.level = Level::no_logs;
}
}
} else {
problems.emplace_back("Unable to create log directory " + logFile.parent_path().string() +
": " + ec.message());
binary.level = Level::no_logs;
text.level = Level::no_logs;
}
}
for (const auto& problem : problems) {
LOG_ERROR("{}", problem);
}
LOG_INFO("Logging configured (binary: {}, text: {}, console: {})", binary.level, text.level,
console.level);
flushImpl();
}
void Log::flushImpl() {
session.consume(*this);
}
Log::LoggerConfiguration Log::getLoggerConfigurationImpl(std::string_view name) {
LoggerConfiguration ret{session, std::string{logRootName}};
ret.fullyQualifiedName += '/';
ret.fullyQualifiedName += name;
ret.level = std::min({binary.level, text.level, console.level});
ret.level = getLevelFor(ret.fullyQualifiedName, sourceRules, ret.level);
// avoid Logger constructor loop
if (name != "logging") {
LOG_DEBUG("Configuring logger {} with level {}", ret.fullyQualifiedName, ret.level);
}
return ret;
}
Log& Log::write(const char* buffer, std::streamsize size) {
if (text) text.write(buffer, size);
if (binary) binary.write(buffer, size);
if (console) console.write(buffer, size);
return *this;
}
Logger& Log::logger() {
static Logger ret{getLoggerConfigurationImpl("logging")};
return ret;
}
} // namespace codeql

View File

@@ -0,0 +1,189 @@
#pragma once
#include <fstream>
#include <iostream>
#include <regex>
#include <vector>
#include <binlog/binlog.hpp>
#include <binlog/TextOutputStream.hpp>
#include <binlog/EventFilter.hpp>
#include <binlog/adapt_stdfilesystem.hpp>
#include <binlog/adapt_stdoptional.hpp>
#include <binlog/adapt_stdvariant.hpp>
// Logging macros. These will call `logger()` to get a Logger instance, picking up any `logger`
// defined in the current scope. Domain-specific loggers can be added or used by either:
// * providing a class field called `logger` (as `Logger::operator()()` returns itself)
// * declaring a local `logger` variable (to be used for one-time execution like code in `main`)
// * declaring a `Logger& logger()` function returning a reference to a static local variable
// * passing a logger around using a `Logger& logger` function parameter
// They are created with a name that appears in the logs and can be used to filter debug levels (see
// `Logger`).
#define LOG_CRITICAL(...) LOG_WITH_LEVEL(codeql::Log::Level::critical, __VA_ARGS__)
#define LOG_ERROR(...) LOG_WITH_LEVEL(codeql::Log::Level::error, __VA_ARGS__)
#define LOG_WARNING(...) LOG_WITH_LEVEL(codeql::Log::Level::warning, __VA_ARGS__)
#define LOG_INFO(...) LOG_WITH_LEVEL(codeql::Log::Level::info, __VA_ARGS__)
#define LOG_DEBUG(...) LOG_WITH_LEVEL(codeql::Log::Level::debug, __VA_ARGS__)
#define LOG_TRACE(...) LOG_WITH_LEVEL(codeql::Log::Level::trace, __VA_ARGS__)
// only do the actual logging if the picked up `Logger` instance is configured to handle the
// provided log level. `LEVEL` must be a compile-time constant. `logger()` is evaluated once
#define LOG_WITH_LEVEL(LEVEL, ...) \
do { \
constexpr codeql::Log::Level _level = LEVEL; \
codeql::Logger& _logger = logger(); \
if (_level >= _logger.level()) { \
BINLOG_CREATE_SOURCE_AND_EVENT(_logger.writer(), _level, /* category */, binlog::clockNow(), \
__VA_ARGS__); \
} \
} while (false)
// avoid calling into binlog's original macros
#undef BINLOG_CRITICAL
#undef BINLOG_CRITICAL_W
#undef BINLOG_CRITICAL_C
#undef BINLOG_CRITICAL_WC
#undef BINLOG_ERROR
#undef BINLOG_ERROR_W
#undef BINLOG_ERROR_C
#undef BINLOG_ERROR_WC
#undef BINLOG_WARNING
#undef BINLOG_WARNING_W
#undef BINLOG_WARNING_C
#undef BINLOG_WARNING_WC
#undef BINLOG_INFO
#undef BINLOG_INFO_W
#undef BINLOG_INFO_C
#undef BINLOG_INFO_WC
#undef BINLOG_DEBUG
#undef BINLOG_DEBUG_W
#undef BINLOG_DEBUG_C
#undef BINLOG_DEBUG_WC
#undef BINLOG_TRACE
#undef BINLOG_TRACE_W
#undef BINLOG_TRACE_C
#undef BINLOG_TRACE_WC
namespace codeql {
// tools should define this to tweak the root name of all loggers
extern const std::string_view logRootName;
// This class is responsible for the global log state (outputs, log level rules, flushing)
// State is stored in the singleton `Log::instance()`.
// Before using logging, `Log::configure("<name>")` should be used (e.g.
// `Log::configure("extractor")`). Then, `Log::flush()` should be regularly called.
// Logging is configured upon first usage. This consists in
// * using environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_DIR` to choose where to dump the log
// file(s). Log files will go to a subdirectory thereof named after `logRootName`
// * using environment variable `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` to configure levels for
// loggers and outputs. This must have the form of a comma separated `spec:level` list, where
// `spec` is either a glob pattern (made up of alphanumeric, `/`, `*` and `.` characters) for
// matching logger names or one of `out:bin`, `out:text` or `out:console`.
// Output default levels can be seen in the corresponding initializers below. By default, all
// loggers are configured with the lowest output level
class Log {
public:
using Level = binlog::Severity;
// Internal data required to build `Logger` instances
struct LoggerConfiguration {
binlog::Session& session;
std::string fullyQualifiedName;
Level level;
};
// Flush logs to the designated outputs
static void flush() { instance().flushImpl(); }
// create `Logger` configuration, used internally by `Logger`'s constructor
static LoggerConfiguration getLoggerConfiguration(std::string_view name) {
return instance().getLoggerConfigurationImpl(name);
}
private:
static constexpr const char* format = "%u %S [%n] %m (%G:%L)\n";
Log() { configure(); }
static Log& instance() {
static Log ret;
return ret;
}
class Logger& logger();
void configure();
void flushImpl();
LoggerConfiguration getLoggerConfigurationImpl(std::string_view name);
// make `session.consume(*this)` work, which requires access to `write`
friend binlog::Session;
Log& write(const char* buffer, std::streamsize size);
// Output filtered according to a configured log level
template <typename Output>
struct FilteredOutput {
binlog::Severity level;
Output output;
binlog::EventFilter filter{
[this](const binlog::EventSource& src) { return src.severity >= level; }};
template <typename... Args>
FilteredOutput(Level level, Args&&... args)
: level{level}, output{std::forward<Args>(args)...} {}
FilteredOutput& write(const char* buffer, std::streamsize size) {
filter.writeAllowed(buffer, size, output);
return *this;
}
// if configured as `no_logs`, the output is effectively disabled
explicit operator bool() const { return level < Level::no_logs; }
};
using LevelRule = std::pair<std::regex, Level>;
using LevelRules = std::vector<LevelRule>;
binlog::Session session;
std::ofstream textFile;
FilteredOutput<std::ofstream> binary{Level::no_logs};
FilteredOutput<binlog::TextOutputStream> text{Level::info, textFile, format};
FilteredOutput<binlog::TextOutputStream> console{Level::warning, std::cerr, format};
LevelRules sourceRules;
std::vector<std::string> collectSeverityRulesAndReturnProblems(const char* envVar);
};
// This class represent a named domain-specific logger, responsible for pushing logs using the
// underlying `binlog::SessionWriter` class. This has a configured log level, so that logs on this
// `Logger` with a level lower than the configured one are no-ops. The level is configured based
// on rules matching `<logRootName>/<name>` in `CODEQL_EXTRACTOR_SWIFT_LOG_LEVELS` (see above).
// `<name>` is provided in the constructor. If no rule matches the name, the log level defaults to
// the minimum level of all outputs.
class Logger {
public:
// configured logger based on name, as explained above
explicit Logger(std::string_view name) : Logger(Log::getLoggerConfiguration(name)) {}
// used internally, public to be accessible to Log for its own logger
explicit Logger(Log::LoggerConfiguration&& configuration)
: w{configuration.session, queueSize, /* id */ 0,
std::move(configuration.fullyQualifiedName)},
level_{configuration.level} {}
binlog::SessionWriter& writer() { return w; }
Log::Level level() const { return level_; }
// make defining a `Logger logger` field be equivalent to providing a `Logger& logger()` function
// in order to be picked up by logging macros
Logger& operator()() { return *this; }
private:
static constexpr size_t queueSize = 1 << 20; // default taken from binlog
binlog::SessionWriter w;
Log::Level level_;
};
} // namespace codeql

View File

@@ -16,9 +16,12 @@
#include "swift/extractor/invocation/SwiftInvocationExtractor.h"
#include "swift/extractor/trap/TrapDomain.h"
#include "swift/extractor/infra/file/Path.h"
#include "swift/extractor/infra/log/SwiftLogging.h"
using namespace std::string_literals;
const std::string_view codeql::logRootName = "extractor";
// must be called before processFrontendOptions modifies output paths
static void lockOutputSwiftModuleTraps(codeql::SwiftExtractorState& state,
const swift::FrontendOptions& options) {
@@ -179,7 +182,28 @@ codeql::SwiftExtractorConfiguration configure(int argc, char** argv) {
return configuration;
}
int main(int argc, char** argv) {
// TODO: use `absl::StrJoin` or `boost::algorithm::join`
static auto argDump(int argc, char** argv) {
std::string ret;
for (auto arg = argv + 1; arg < argv + argc; ++arg) {
ret += *arg;
ret += ' ';
}
ret.pop_back();
return ret;
}
// TODO: use `absl::StrJoin` or `boost::algorithm::join`
static auto envDump(char** envp) {
std::string ret;
for (auto env = envp; *env; ++env) {
ret += *env;
ret += '\n';
}
return ret;
}
int main(int argc, char** argv, char** envp) {
checkWhetherToRunUnderTool(argc, argv);
if (argc == 1) {
@@ -193,6 +217,11 @@ int main(int argc, char** argv) {
initializeSwiftModules();
const auto configuration = configure(argc, argv);
{
codeql::Logger logger{"main"};
LOG_INFO("calling extractor with arguments \"{}\"", argDump(argc, argv));
LOG_DEBUG("environment:\n{}\n", envDump(envp));
}
auto openInterception = codeql::setupFileInterception(configuration);
@@ -204,5 +233,7 @@ int main(int argc, char** argv) {
observer.markSuccessfullyExtractedFiles();
}
codeql::Log::flush();
return frontend_rc;
}

View File

@@ -49,5 +49,6 @@ swift_cc_library(
visibility = ["//visibility:public"],
deps = [
"//swift/extractor/infra/file",
"//swift/extractor/infra/log",
],
)

View File

@@ -5,18 +5,23 @@
#include "swift/extractor/trap/TrapLabel.h"
#include "swift/extractor/infra/file/TargetFile.h"
#include "swift/extractor/infra/log/SwiftLogging.h"
namespace codeql {
// Abstracts a given trap output file, with its own universe of trap labels
class TrapDomain {
TargetFile out;
Logger logger{getLoggerName()};
public:
explicit TrapDomain(TargetFile&& out) : out{std::move(out)} {}
explicit TrapDomain(TargetFile&& out) : out{std::move(out)} {
LOG_DEBUG("writing trap file with target {}", this->out.target());
}
template <typename Entry>
void emit(const Entry& e) {
LOG_TRACE("{}", e);
out << e << '\n';
}
@@ -48,6 +53,7 @@ class TrapDomain {
Args&&... args) {
auto ret = allocateLabel<Tag>();
assignKey(ret, std::forward<Args>(args)...);
LOG_TRACE("^^^ .implementation {}", implementationId);
out << " .implementation " << trapQuoted(implementationId) << '\n';
return ret;
}
@@ -62,6 +68,7 @@ class TrapDomain {
template <typename Tag>
void assignStar(TrapLabel<Tag> label) {
LOG_TRACE("{}=*", label);
out << label << "=*";
}
@@ -69,6 +76,7 @@ class TrapDomain {
void assignKey(TrapLabel<Tag> label, const std::string& key) {
// prefix the key with the id to guarantee the same key is not used wrongly with different tags
auto prefixed = std::string(Tag::prefix) + '_' + key;
LOG_TRACE("{}=@{}", label, prefixed);
out << label << "=@" << trapQuoted(prefixed);
}
@@ -78,6 +86,17 @@ class TrapDomain {
(oss << ... << keyParts);
assignKey(label, oss.str());
}
std::string getLoggerName() {
// packaged swift modules are typically structured as
// `Module.swiftmodule/<arch_triple>.swiftmodule`, so the parent is more informative
// We use `Module.swiftmodule/.trap` then
if (auto parent = out.target().parent_path(); parent.extension() == ".swiftmodule") {
return parent.filename() / ".trap";
} else {
return out.target().filename();
}
}
};
} // namespace codeql

View File

@@ -5,6 +5,9 @@
#include <iostream>
#include <string>
#include <vector>
#include <binlog/binlog.hpp>
#include <cmath>
#include <charconv>
namespace codeql {
@@ -18,6 +21,7 @@ class UntypedTrapLabel {
friend class std::hash<UntypedTrapLabel>;
template <typename Tag>
friend class TrapLabel;
BINLOG_ADAPT_STRUCT_FRIEND;
static constexpr uint64_t undefined = 0xffffffffffffffff;
@@ -38,7 +42,22 @@ class UntypedTrapLabel {
return out;
}
std::string str() const {
std::string ret(strSize(), '\0');
ret[0] = '#';
std::to_chars(ret.data() + 1, ret.data() + ret.size(), id_, 16);
return ret;
}
friend bool operator==(UntypedTrapLabel lhs, UntypedTrapLabel rhs) { return lhs.id_ == rhs.id_; }
private:
size_t strSize() const {
if (id_ == undefined) return 17; // #ffffffffffffffff
if (id_ == 0) return 2; // #0
// TODO: use absl::bit_width or C+20 std::bit_width instead of this ugly formula
return /* # */ 1 + /* hex digits */ static_cast<size_t>(ceil(log2(id_ + 1) / 4));
}
};
template <typename TagParam>
@@ -100,3 +119,33 @@ struct hash<codeql::UntypedTrapLabel> {
}
};
} // namespace std
namespace mserialize {
// log labels using their string representation, using binlog/mserialize internal plumbing
template <>
struct CustomTag<codeql::UntypedTrapLabel, void> : detail::BuiltinTag<std::string> {
using T = codeql::UntypedTrapLabel;
};
template <typename Tag>
struct CustomTag<codeql::TrapLabel<Tag>, void> : detail::BuiltinTag<std::string> {
using T = codeql::TrapLabel<Tag>;
};
template <>
struct CustomSerializer<codeql::UntypedTrapLabel, void> {
template <typename OutputStream>
static void serialize(codeql::UntypedTrapLabel label, OutputStream& out) {
mserialize::serialize(label.str(), out);
}
static size_t serialized_size(codeql::UntypedTrapLabel label) {
return sizeof(std::uint32_t) + label.strSize();
}
};
template <typename Tag>
struct CustomSerializer<codeql::TrapLabel<Tag>, void> : CustomSerializer<codeql::UntypedTrapLabel> {
};
} // namespace mserialize

0
swift/third_party/binlog/BUILD.bazel vendored Normal file
View File

View File

@@ -0,0 +1,19 @@
cc_library(
name = "binlog",
hdrs = glob(["include/**/*.hpp"]),
srcs = glob(["include/**/*.cpp"]),
includes = ["include"],
visibility = ["//visibility:public"],
)
cc_binary(
name = "bread",
srcs = ["bin/bread.cpp", "bin/printers.hpp", "bin/printers.cpp", "bin/getopt.hpp"],
deps = [":binlog"],
)
cc_binary(
name = "brecovery",
srcs = ["bin/brecovery.cpp", "bin/getopt.hpp"],
deps = [":binlog"],
)

View File

@@ -12,16 +12,29 @@ _swift_arch_map = {
"macOS-X64": "darwin_x86_64",
}
def _get_label(repository_name, package, target):
return "@%s//swift/third_party/%s:%s" % (repository_name, package, target)
def _get_label(workspace_name, package, target):
return "@%s//swift/third_party/%s:%s" % (workspace_name, package, target)
def _get_build(repository_name, package):
return _get_label(repository_name, package, "BUILD.%s.bazel" % package)
def _get_build(workspace_name, package):
return _get_label(workspace_name, package, "BUILD.%s.bazel" % package)
def _get_patch(repository_name, package, patch):
return _get_label(repository_name, package, "patches/%s.patch" % patch)
def _get_patch(workspace_name, package, patch):
return _get_label(workspace_name, package, "patches/%s.patch" % patch)
def load_dependencies(repository_name):
def _github_archive(*, name, workspace_name, repository, commit, sha256 = None, patches = None):
github_name = repository[repository.index("/") + 1:]
patches = [_get_patch(workspace_name, name, p) for p in patches or []]
http_archive(
name = name,
url = "https://github.com/%s/archive/%s.zip" % (repository, commit),
strip_prefix = "%s-%s" % (github_name, commit),
build_file = _get_build(workspace_name, name),
sha256 = sha256,
patch_args = ["-p1"],
patches = patches,
)
def load_dependencies(workspace_name):
for repo_arch, arch in _swift_arch_map.items():
sha256 = _swift_sha_map[repo_arch]
@@ -31,16 +44,24 @@ def load_dependencies(repository_name):
_swift_prebuilt_version,
repo_arch,
),
build_file = _get_build(repository_name, "swift-llvm-support"),
build_file = _get_build(workspace_name, "swift-llvm-support"),
sha256 = sha256,
patch_args = ["-p1"],
patches = [],
)
http_archive(
_github_archive(
name = "picosha2",
url = "https://github.com/okdshin/PicoSHA2/archive/27fcf6979298949e8a462e16d09a0351c18fcaf2.zip",
strip_prefix = "PicoSHA2-27fcf6979298949e8a462e16d09a0351c18fcaf2",
build_file = _get_build(repository_name, "picosha2"),
workspace_name = workspace_name,
repository = "okdshin/PicoSHA2",
commit = "27fcf6979298949e8a462e16d09a0351c18fcaf2",
sha256 = "d6647ca45a8b7bdaf027ecb68d041b22a899a0218b7206dee755c558a2725abb",
)
_github_archive(
name = "binlog",
workspace_name = workspace_name,
repository = "morganstanley/binlog",
commit = "3fef8846f5ef98e64211e7982c2ead67e0b185a6",
sha256 = "f5c61d90a6eff341bf91771f2f465be391fd85397023e1b391c17214f9cbd045",
)