Merge branch 'main' into mathiasvp/array-field-flow

This commit is contained in:
Mathias Vorreiter Pedersen
2020-09-14 20:45:06 +02:00
149 changed files with 5951 additions and 675 deletions

View File

@@ -18,6 +18,7 @@ from Include i, File f, string extension
where
f = i.getIncludedFile() and
extension = f.getExtension().toLowerCase() and
extension != "inc" and
extension != "inl" and
extension != "tcc" and
extension != "tpp" and

View File

@@ -144,8 +144,23 @@ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) {
*/
predicate jumpStep(Node n1, Node n2) { none() }
/**
* Gets a field corresponding to the bit range `[startBit..endBit)` of class `c`, if any.
*/
private Field getAField(Class c, int startBit, int endBit) {
result.getDeclaringType() = c and
startBit = 8 * result.getByteOffset() and
endBit = 8 * result.getType().getSize() + startBit
or
exists(Field f, Class cInner |
f = c.getAField() and
cInner = f.getUnderlyingType() and
result = getAField(cInner, startBit - 8 * f.getByteOffset(), endBit - 8 * f.getByteOffset())
)
}
private newtype TContent =
TFieldContent(Field f) or
TFieldContent(Class c, int startBit, int endBit) { exists(getAField(c, startBit, endBit)) } or
TCollectionContent() or
TArrayContent()
@@ -163,17 +178,18 @@ class Content extends TContent {
}
private class FieldContent extends Content, TFieldContent {
Field f;
Class c;
int startBit;
int endBit;
FieldContent() { this = TFieldContent(f) }
FieldContent() { this = TFieldContent(c, startBit, endBit) }
Field getField() { result = f }
// Ensure that there's just 1 result for `toString`.
override string toString() { result = min(Field f | f = getAField() | f.toString()) }
override string toString() { result = f.toString() }
predicate hasOffset(Class cl, int start, int end) { cl = c and start = startBit and end = endBit }
override predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
f.getLocation().hasLocationInfo(path, sl, sc, el, ec)
}
Field getAField() { result = getAField(c, startBit, endBit) }
}
private class CollectionContent extends Content, TCollectionContent {
@@ -187,25 +203,43 @@ private class ArrayContent extends Content, TArrayContent {
}
private predicate fieldStoreStepNoChi(Node node1, FieldContent f, PostUpdateNode node2) {
exists(FieldAddressInstruction fa, StoreInstruction store |
exists(StoreInstruction store, Class c |
store = node2.asInstruction() and
store.getDestinationAddress() = fa and
store.getSourceValue() = node1.asInstruction() and
f.getField() = fa.getField()
getWrittenField(store, f.(FieldContent).getAField(), c) and
f.hasOffset(c, _, _)
)
}
pragma[noinline]
private predicate getWrittenField(StoreInstruction store, Field f, Class c) {
exists(FieldAddressInstruction fa |
fa = store.getDestinationAddress() and
f = fa.getField() and
c = f.getDeclaringType()
)
}
private predicate fieldStoreStepChi(Node node1, FieldContent f, PostUpdateNode node2) {
exists(FieldAddressInstruction fa, StoreInstruction store |
exists(StoreInstruction store, ChiInstruction chi |
node1.asInstruction() = store and
store.getDestinationAddress() = fa and
node2.asInstruction().(ChiInstruction).getPartial() = store and
f.getField() = fa.getField()
node2.asInstruction() = chi and
chi.getPartial() = store and
exists(Class c |
c = chi.getResultType() and
exists(int startBit, int endBit |
chi.getUpdatedInterval(startBit, endBit) and
f.hasOffset(c, startBit, endBit)
)
or
getWrittenField(store, f.getAField(), c) and
f.hasOffset(c, _, _)
)
)
}
private predicate arrayStoreStepChi(Node node1, Content a, PostUpdateNode node2) {
a instanceof ArrayContent and
private predicate arrayStoreStepChi(Node node1, ArrayContent a, PostUpdateNode node2) {
a = TArrayContent() and
exists(StoreInstruction store |
node1.asInstruction() = store and
(
@@ -230,12 +264,37 @@ predicate storeStep(Node node1, Content f, PostUpdateNode node2) {
arrayStoreStepChi(node1, f, node2)
}
private predicate fieldReadStep(Node node1, FieldContent f, Node node2) {
exists(FieldAddressInstruction fa, LoadInstruction load |
load.getSourceAddress() = fa and
bindingset[result, i]
private int unbindInt(int i) { i <= result and i >= result }
pragma[noinline]
private predicate getLoadedField(LoadInstruction load, Field f, Class c) {
exists(FieldAddressInstruction fa |
fa = load.getSourceAddress() and
f = fa.getField() and
c = f.getDeclaringType()
)
}
/**
* Holds if data can flow from `node1` to `node2` via a read of `f`.
* Thus, `node1` references an object with a field `f` whose value ends up in
* `node2`.
*/
predicate fieldReadStep(Node node1, FieldContent f, Node node2) {
exists(LoadInstruction load |
node2.asInstruction() = load and
node1.asInstruction() = load.getSourceValueOperand().getAnyDef() and
fa.getField() = f.(FieldContent).getField() and
load = node2.asInstruction()
exists(Class c |
c = load.getSourceValueOperand().getAnyDef().getResultType() and
exists(int startBit, int endBit |
load.getSourceValueOperand().getUsedInterval(unbindInt(startBit), unbindInt(endBit)) and
f.hasOffset(c, startBit, endBit)
)
or
getLoadedField(load, f.getAField(), c) and
f.hasOffset(c, _, _)
)
)
}

View File

@@ -335,12 +335,14 @@ abstract private class PartialDefinitionNode extends PostUpdateNode {
private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode {
override ChiInstruction instr;
FieldAddressInstruction field;
StoreInstruction store;
ExplicitFieldStoreQualifierNode() {
not instr.isResultConflated() and
exists(StoreInstruction store |
instr.getPartial() = store and field = store.getDestinationAddress()
instr.getPartial() = store and
(
instr.getUpdatedInterval(_, _) or
store.getDestinationAddress() instanceof FieldAddressInstruction
)
}
@@ -351,7 +353,12 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode {
override Node getPreUpdateNode() { result.asOperand() = instr.getTotalOperand() }
override Expr getDefinedExpr() {
result = field.getObjectAddress().getUnconvertedResultExpression()
result =
store
.getDestinationAddress()
.(FieldAddressInstruction)
.getObjectAddress()
.getUnconvertedResultExpression()
}
}
@@ -363,17 +370,22 @@ private class ExplicitFieldStoreQualifierNode extends PartialDefinitionNode {
*/
private class ExplicitSingleFieldStoreQualifierNode extends PartialDefinitionNode {
override StoreInstruction instr;
FieldAddressInstruction field;
ExplicitSingleFieldStoreQualifierNode() {
field = instr.getDestinationAddress() and
not exists(ChiInstruction chi | chi.getPartial() = instr)
not exists(ChiInstruction chi | chi.getPartial() = instr) and
// Without this condition any store would create a `PostUpdateNode`.
instr.getDestinationAddress() instanceof FieldAddressInstruction
}
override Node getPreUpdateNode() { none() }
override Expr getDefinedExpr() {
result = field.getObjectAddress().getUnconvertedResultExpression()
result =
instr
.getDestinationAddress()
.(FieldAddressInstruction)
.getObjectAddress()
.getUnconvertedResultExpression()
}
}

View File

@@ -1962,6 +1962,13 @@ class ChiInstruction extends Instruction {
* Gets the operand that represents the new value written by the memory write.
*/
final Instruction getPartial() { result = getPartialOperand().getDef() }
/**
* Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
*/
final predicate getUpdatedInterval(int startBit, int endBit) {
Construction::getIntervalUpdatedByChi(this, startBit, endBit)
}
}
/**

View File

@@ -328,6 +328,14 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper
not Construction::isInCycle(useInstr) and
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
}
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
*/
predicate getUsedInterval(int startBitOffset, int endBitOffset) {
Construction::getUsedInterval(this, startBitOffset, endBitOffset)
}
}
/**

View File

@@ -617,3 +617,9 @@ MemoryLocation getOperandMemoryLocation(MemoryOperand operand) {
)
)
}
/** Gets the start bit offset of a `MemoryLocation`, if any. */
int getStartBitOffset(VariableMemoryLocation location) { result = location.getStartBitOffset() }
/** Gets the end bit offset of a `MemoryLocation`, if any. */
int getEndBitOffset(VariableMemoryLocation location) { result = location.getEndBitOffset() }

View File

@@ -149,6 +149,34 @@ private module Cached {
)
}
/**
* Holds if the partial operand of this `ChiInstruction` updates the bit range
* `[startBitOffset, endBitOffset)` of the total operand.
*/
cached
predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBitOffset, int endBitOffset) {
exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
oldInstruction = getOldInstruction(chi.getPartial()) and
location = Alias::getResultMemoryLocation(oldInstruction) and
startBitOffset = Alias::getStartBitOffset(location) and
endBitOffset = Alias::getEndBitOffset(location)
)
}
/**
* Holds if `operand` totally overlaps with its definition and consumes the bit range
* `[startBitOffset, endBitOffset)`.
*/
cached
predicate getUsedInterval(NonPhiMemoryOperand operand, int startBitOffset, int endBitOffset) {
exists(Alias::MemoryLocation location, OldIR::NonPhiMemoryOperand oldOperand |
oldOperand = operand.getUse().(OldInstruction).getAnOperand() and
location = Alias::getOperandMemoryLocation(oldOperand) and
startBitOffset = Alias::getStartBitOffset(location) and
endBitOffset = Alias::getEndBitOffset(location)
)
}
/**
* Holds if `instr` is part of a cycle in the operand graph that doesn't go
* through a phi instruction and therefore should be impossible.

View File

@@ -1962,6 +1962,13 @@ class ChiInstruction extends Instruction {
* Gets the operand that represents the new value written by the memory write.
*/
final Instruction getPartial() { result = getPartialOperand().getDef() }
/**
* Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
*/
final predicate getUpdatedInterval(int startBit, int endBit) {
Construction::getIntervalUpdatedByChi(this, startBit, endBit)
}
}
/**

View File

@@ -328,6 +328,14 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper
not Construction::isInCycle(useInstr) and
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
}
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
*/
predicate getUsedInterval(int startBitOffset, int endBitOffset) {
Construction::getUsedInterval(this, startBitOffset, endBitOffset)
}
}
/**

View File

@@ -182,6 +182,18 @@ Instruction getMemoryOperandDefinition(
none()
}
/**
* Holds if the partial operand of this `ChiInstruction` updates the bit range
* `[startBitOffset, endBitOffset)` of the total operand.
*/
predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBit, int endBit) { none() }
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)`.
*/
predicate getUsedInterval(Operand operand, int startBit, int endBit) { none() }
/** Gets a non-phi instruction that defines an operand of `instr`. */
private Instruction getNonPhiOperandDef(Instruction instr) {
result = getRegisterOperandDefinition(instr, _)

View File

@@ -1962,6 +1962,13 @@ class ChiInstruction extends Instruction {
* Gets the operand that represents the new value written by the memory write.
*/
final Instruction getPartial() { result = getPartialOperand().getDef() }
/**
* Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
*/
final predicate getUpdatedInterval(int startBit, int endBit) {
Construction::getIntervalUpdatedByChi(this, startBit, endBit)
}
}
/**

View File

@@ -328,6 +328,14 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper
not Construction::isInCycle(useInstr) and
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
}
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
*/
predicate getUsedInterval(int startBitOffset, int endBitOffset) {
Construction::getUsedInterval(this, startBitOffset, endBitOffset)
}
}
/**

View File

@@ -149,6 +149,34 @@ private module Cached {
)
}
/**
* Holds if the partial operand of this `ChiInstruction` updates the bit range
* `[startBitOffset, endBitOffset)` of the total operand.
*/
cached
predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBitOffset, int endBitOffset) {
exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
oldInstruction = getOldInstruction(chi.getPartial()) and
location = Alias::getResultMemoryLocation(oldInstruction) and
startBitOffset = Alias::getStartBitOffset(location) and
endBitOffset = Alias::getEndBitOffset(location)
)
}
/**
* Holds if `operand` totally overlaps with its definition and consumes the bit range
* `[startBitOffset, endBitOffset)`.
*/
cached
predicate getUsedInterval(NonPhiMemoryOperand operand, int startBitOffset, int endBitOffset) {
exists(Alias::MemoryLocation location, OldIR::NonPhiMemoryOperand oldOperand |
oldOperand = operand.getUse().(OldInstruction).getAnOperand() and
location = Alias::getOperandMemoryLocation(oldOperand) and
startBitOffset = Alias::getStartBitOffset(location) and
endBitOffset = Alias::getEndBitOffset(location)
)
}
/**
* Holds if `instr` is part of a cycle in the operand graph that doesn't go
* through a phi instruction and therefore should be impossible.

View File

@@ -79,3 +79,9 @@ MemoryLocation getResultMemoryLocation(Instruction instr) {
MemoryLocation getOperandMemoryLocation(MemoryOperand operand) {
result = getMemoryLocation(getAddressOperandAllocation(operand.getAddressOperand()))
}
/** Gets the start bit offset of a `MemoryLocation`, if any. */
int getStartBitOffset(MemoryLocation location) { none() }
/** Gets the end bit offset of a `MemoryLocation`, if any. */
int getEndBitOffset(MemoryLocation location) { none() }

View File

@@ -570,7 +570,7 @@ private float getTruncatedLowerBounds(Expr expr) {
else (
// Some of the bounds computed by getLowerBoundsImpl might
// overflow, so we replace invalid bounds with exprMinVal.
exists(float newLB | newLB = getLowerBoundsImpl(expr) |
exists(float newLB | newLB = normalizeFloatUp(getLowerBoundsImpl(expr)) |
if exprMinVal(expr) <= newLB and newLB <= exprMaxVal(expr)
then result = newLB
else result = exprMinVal(expr)
@@ -617,7 +617,7 @@ private float getTruncatedUpperBounds(Expr expr) {
// Some of the bounds computed by `getUpperBoundsImpl`
// might overflow, so we replace invalid bounds with
// `exprMaxVal`.
exists(float newUB | newUB = getUpperBoundsImpl(expr) |
exists(float newUB | newUB = normalizeFloatUp(getUpperBoundsImpl(expr)) |
if exprMinVal(expr) <= newUB and newUB <= exprMaxVal(expr)
then result = newUB
else result = exprMaxVal(expr)
@@ -1248,70 +1248,6 @@ private float getDefUpperBoundsImpl(RangeSsaDefinition def, StackVariable v) {
unanalyzableDefBounds(def, v, _, result)
}
/**
* Get the lower bounds for a `RangeSsaDefinition`. Most of the work is
* done by `getDefLowerBoundsImpl`, but this is where widening is applied
* to prevent the analysis from exploding due to a recursive definition.
*/
private float getDefLowerBounds(RangeSsaDefinition def, StackVariable v) {
exists(float newLB, float truncatedLB |
newLB = getDefLowerBoundsImpl(def, v) and
if varMinVal(v) <= newLB and newLB <= varMaxVal(v)
then truncatedLB = newLB
else truncatedLB = varMinVal(v)
|
// Widening: check whether the new lower bound is from a source which
// depends recursively on the current definition.
if isRecursiveDef(def, v)
then
// The new lower bound is from a recursive source, so we round
// down to one of a limited set of values to prevent the
// recursion from exploding.
result =
max(float widenLB |
widenLB = wideningLowerBounds(getVariableRangeType(v)) and
not widenLB > truncatedLB
|
widenLB
)
else result = truncatedLB
)
or
// The definition might overflow positively and wrap. If so, the lower
// bound is `typeLowerBound`.
defMightOverflowPositively(def, v) and result = varMinVal(v)
}
/** See comment for `getDefLowerBounds`, above. */
private float getDefUpperBounds(RangeSsaDefinition def, StackVariable v) {
exists(float newUB, float truncatedUB |
newUB = getDefUpperBoundsImpl(def, v) and
if varMinVal(v) <= newUB and newUB <= varMaxVal(v)
then truncatedUB = newUB
else truncatedUB = varMaxVal(v)
|
// Widening: check whether the new upper bound is from a source which
// depends recursively on the current definition.
if isRecursiveDef(def, v)
then
// The new upper bound is from a recursive source, so we round
// up to one of a fixed set of values to prevent the recursion
// from exploding.
result =
min(float widenUB |
widenUB = wideningUpperBounds(getVariableRangeType(v)) and
not widenUB < truncatedUB
|
widenUB
)
else result = truncatedUB
)
or
// The definition might overflow negatively and wrap. If so, the upper
// bound is `typeUpperBound`.
defMightOverflowNegatively(def, v) and result = varMaxVal(v)
}
/**
* Helper for `getDefLowerBounds` and `getDefUpperBounds`. Find the set of
* unanalyzable definitions (such as function parameters) and make their
@@ -1688,6 +1624,70 @@ module SimpleRangeAnalysisInternal {
float getFullyConvertedUpperBounds(Expr expr) {
result = getTruncatedUpperBounds(expr.getFullyConverted())
}
/**
* Get the lower bounds for a `RangeSsaDefinition`. Most of the work is
* done by `getDefLowerBoundsImpl`, but this is where widening is applied
* to prevent the analysis from exploding due to a recursive definition.
*/
float getDefLowerBounds(RangeSsaDefinition def, StackVariable v) {
exists(float newLB, float truncatedLB |
newLB = getDefLowerBoundsImpl(def, v) and
if varMinVal(v) <= newLB and newLB <= varMaxVal(v)
then truncatedLB = newLB
else truncatedLB = varMinVal(v)
|
// Widening: check whether the new lower bound is from a source which
// depends recursively on the current definition.
if isRecursiveDef(def, v)
then
// The new lower bound is from a recursive source, so we round
// down to one of a limited set of values to prevent the
// recursion from exploding.
result =
max(float widenLB |
widenLB = wideningLowerBounds(getVariableRangeType(v)) and
not widenLB > truncatedLB
|
widenLB
)
else result = truncatedLB
)
or
// The definition might overflow positively and wrap. If so, the lower
// bound is `typeLowerBound`.
defMightOverflowPositively(def, v) and result = varMinVal(v)
}
/** See comment for `getDefLowerBounds`, above. */
float getDefUpperBounds(RangeSsaDefinition def, StackVariable v) {
exists(float newUB, float truncatedUB |
newUB = getDefUpperBoundsImpl(def, v) and
if varMinVal(v) <= newUB and newUB <= varMaxVal(v)
then truncatedUB = newUB
else truncatedUB = varMaxVal(v)
|
// Widening: check whether the new upper bound is from a source which
// depends recursively on the current definition.
if isRecursiveDef(def, v)
then
// The new upper bound is from a recursive source, so we round
// up to one of a fixed set of values to prevent the recursion
// from exploding.
result =
min(float widenUB |
widenUB = wideningUpperBounds(getVariableRangeType(v)) and
not widenUB < truncatedUB
|
widenUB
)
else result = truncatedUB
)
or
// The definition might overflow negatively and wrap. If so, the upper
// bound is `typeUpperBound`.
defMightOverflowNegatively(def, v) and result = varMaxVal(v)
}
}
private import SimpleRangeAnalysisInternal

View File

@@ -104,7 +104,7 @@ public:
{
if (C1 *c1 = dynamic_cast<C1 *>(c))
{
sink(c1->a); // $ast $ir
sink(c1->a); // $ast,ir
}
C *cc;
if (C2 *c2 = dynamic_cast<C2 *>(c))

View File

@@ -92,3 +92,12 @@ void nestedAssign() {
w.s.m1 = user_input();
sink(w.s.m1); // $ast,ir
}
void addressOfField() {
S s;
s.m1 = user_input();
S s_copy = s;
int* px = &s_copy.m1;
sink(*px); // $f-:ast $ir
}

View File

@@ -20,6 +20,7 @@
| aliasing.cpp:42:11:42:20 | call to user_input | aliasing.cpp:43:13:43:14 | m1 | IR only |
| aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 | IR only |
| aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 | IR only |
| aliasing.cpp:98:10:98:19 | call to user_input | aliasing.cpp:102:8:102:10 | * ... | IR only |
| by_reference.cpp:84:14:84:23 | call to user_input | by_reference.cpp:111:25:111:25 | a | AST only |
| by_reference.cpp:84:14:84:23 | call to user_input | by_reference.cpp:115:27:115:27 | a | AST only |
| by_reference.cpp:88:13:88:22 | call to user_input | by_reference.cpp:131:25:131:25 | a | AST only |

View File

@@ -60,6 +60,10 @@ edges
| aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 |
| aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 |
| aliasing.cpp:92:12:92:21 | call to user_input | aliasing.cpp:93:12:93:13 | m1 |
| aliasing.cpp:98:3:98:21 | Chi [m1] | aliasing.cpp:100:14:100:14 | Store [m1] |
| aliasing.cpp:98:3:98:21 | Store | aliasing.cpp:98:3:98:21 | Chi [m1] |
| aliasing.cpp:98:10:98:19 | call to user_input | aliasing.cpp:98:3:98:21 | Store |
| aliasing.cpp:100:14:100:14 | Store [m1] | aliasing.cpp:102:8:102:10 | * ... |
| by_reference.cpp:50:3:50:3 | setDirectly output argument [a] | by_reference.cpp:51:8:51:8 | Argument -1 indirection [a] |
| by_reference.cpp:50:17:50:26 | call to user_input | by_reference.cpp:50:3:50:3 | setDirectly output argument [a] |
| by_reference.cpp:51:8:51:8 | Argument -1 indirection [a] | by_reference.cpp:51:10:51:20 | call to getDirectly |
@@ -263,6 +267,11 @@ nodes
| aliasing.cpp:87:12:87:13 | m1 | semmle.label | m1 |
| aliasing.cpp:92:12:92:21 | call to user_input | semmle.label | call to user_input |
| aliasing.cpp:93:12:93:13 | m1 | semmle.label | m1 |
| aliasing.cpp:98:3:98:21 | Chi [m1] | semmle.label | Chi [m1] |
| aliasing.cpp:98:3:98:21 | Store | semmle.label | Store |
| aliasing.cpp:98:10:98:19 | call to user_input | semmle.label | call to user_input |
| aliasing.cpp:100:14:100:14 | Store [m1] | semmle.label | Store [m1] |
| aliasing.cpp:102:8:102:10 | * ... | semmle.label | * ... |
| by_reference.cpp:50:3:50:3 | setDirectly output argument [a] | semmle.label | setDirectly output argument [a] |
| by_reference.cpp:50:17:50:26 | call to user_input | semmle.label | call to user_input |
| by_reference.cpp:51:8:51:8 | Argument -1 indirection [a] | semmle.label | Argument -1 indirection [a] |
@@ -419,6 +428,7 @@ nodes
| aliasing.cpp:80:12:80:13 | m1 | aliasing.cpp:79:11:79:20 | call to user_input | aliasing.cpp:80:12:80:13 | m1 | m1 flows from $@ | aliasing.cpp:79:11:79:20 | call to user_input | call to user_input |
| aliasing.cpp:87:12:87:13 | m1 | aliasing.cpp:86:10:86:19 | call to user_input | aliasing.cpp:87:12:87:13 | m1 | m1 flows from $@ | aliasing.cpp:86:10:86:19 | call to user_input | call to user_input |
| aliasing.cpp:93:12:93:13 | m1 | aliasing.cpp:92:12:92:21 | call to user_input | aliasing.cpp:93:12:93:13 | m1 | m1 flows from $@ | aliasing.cpp:92:12:92:21 | call to user_input | call to user_input |
| aliasing.cpp:102:8:102:10 | * ... | aliasing.cpp:98:10:98:19 | call to user_input | aliasing.cpp:102:8:102:10 | * ... | * ... flows from $@ | aliasing.cpp:98:10:98:19 | call to user_input | call to user_input |
| by_reference.cpp:51:10:51:20 | call to getDirectly | by_reference.cpp:50:17:50:26 | call to user_input | by_reference.cpp:51:10:51:20 | call to getDirectly | call to getDirectly flows from $@ | by_reference.cpp:50:17:50:26 | call to user_input | call to user_input |
| by_reference.cpp:57:10:57:22 | call to getIndirectly | by_reference.cpp:56:19:56:28 | call to user_input | by_reference.cpp:57:10:57:22 | call to getIndirectly | call to getIndirectly flows from $@ | by_reference.cpp:56:19:56:28 | call to user_input | call to user_input |
| by_reference.cpp:63:10:63:28 | call to getThroughNonMember | by_reference.cpp:62:25:62:34 | call to user_input | by_reference.cpp:63:10:63:28 | call to getThroughNonMember | call to getThroughNonMember flows from $@ | by_reference.cpp:62:25:62:34 | call to user_input | call to user_input |

View File

@@ -157,6 +157,7 @@
| aliasing.cpp:86:5:86:6 | m1 | AST only |
| aliasing.cpp:92:3:92:3 | w | AST only |
| aliasing.cpp:92:7:92:8 | m1 | AST only |
| aliasing.cpp:98:5:98:6 | m1 | AST only |
| by_reference.cpp:12:8:12:8 | a | AST only |
| by_reference.cpp:16:11:16:11 | a | AST only |
| by_reference.cpp:20:5:20:8 | this | AST only |

View File

@@ -26,6 +26,7 @@
| aliasing.cpp:79:3:79:3 | s |
| aliasing.cpp:86:3:86:3 | s |
| aliasing.cpp:92:5:92:5 | s |
| aliasing.cpp:98:3:98:3 | s |
| by_reference.cpp:12:5:12:5 | s |
| by_reference.cpp:16:5:16:8 | this |
| by_reference.cpp:84:3:84:7 | inner |

View File

@@ -185,6 +185,8 @@
| aliasing.cpp:92:3:92:3 | w |
| aliasing.cpp:92:5:92:5 | s |
| aliasing.cpp:92:7:92:8 | m1 |
| aliasing.cpp:98:3:98:3 | s |
| aliasing.cpp:98:5:98:6 | m1 |
| by_reference.cpp:12:5:12:5 | s |
| by_reference.cpp:12:8:12:8 | a |
| by_reference.cpp:16:5:16:8 | this |

View File

@@ -258,7 +258,7 @@ void test_lambdas()
c = source();
};
e(t, u, w);
sink(w); // tainted
sink(w); // tainted [NOT DETECTED]
}
// --- taint through return value ---
@@ -468,7 +468,7 @@ void test_swop() {
swop(x, y);
sink(x); // clean [FALSE POSITIVE]
sink(y); // tainted
sink(y); // tainted [NOT DETECTED by IR]
}
// --- getdelim ---

View File

@@ -16,13 +16,8 @@
| arrayassignment.cpp:141:7:141:13 | arrayassignment.cpp:139:10:139:15 | IR only |
| arrayassignment.cpp:145:7:145:13 | arrayassignment.cpp:144:12:144:17 | IR only |
| arrayassignment.cpp:146:7:146:13 | arrayassignment.cpp:144:12:144:17 | IR only |
| copyableclass.cpp:40:8:40:9 | copyableclass.cpp:34:22:34:27 | AST only |
| copyableclass.cpp:41:8:41:9 | copyableclass.cpp:35:24:35:29 | AST only |
| copyableclass.cpp:42:8:42:9 | copyableclass.cpp:34:22:34:27 | AST only |
| copyableclass.cpp:43:8:43:9 | copyableclass.cpp:38:8:38:13 | AST only |
| copyableclass.cpp:65:8:65:9 | copyableclass.cpp:60:40:60:45 | AST only |
| copyableclass.cpp:66:8:66:9 | copyableclass.cpp:63:24:63:29 | AST only |
| copyableclass.cpp:67:11:67:11 | copyableclass.cpp:67:13:67:18 | AST only |
| copyableclass.cpp:67:11:67:21 | copyableclass.cpp:67:13:67:18 | IR only |
| copyableclass_declonly.cpp:40:8:40:9 | copyableclass_declonly.cpp:34:30:34:35 | AST only |
| copyableclass_declonly.cpp:41:8:41:9 | copyableclass_declonly.cpp:35:32:35:37 | AST only |
| copyableclass_declonly.cpp:42:8:42:9 | copyableclass_declonly.cpp:34:30:34:35 | AST only |
@@ -41,13 +36,8 @@
| format.cpp:105:8:105:13 | format.cpp:104:31:104:45 | AST only |
| format.cpp:110:8:110:14 | format.cpp:109:38:109:52 | AST only |
| format.cpp:115:8:115:13 | format.cpp:114:37:114:50 | AST only |
| movableclass.cpp:44:8:44:9 | movableclass.cpp:39:21:39:26 | AST only |
| movableclass.cpp:45:8:45:9 | movableclass.cpp:40:23:40:28 | AST only |
| movableclass.cpp:46:8:46:9 | movableclass.cpp:42:8:42:13 | AST only |
| movableclass.cpp:54:8:54:9 | movableclass.cpp:50:38:50:43 | AST only |
| movableclass.cpp:55:8:55:9 | movableclass.cpp:52:23:52:28 | AST only |
| movableclass.cpp:64:8:64:9 | movableclass.cpp:23:55:23:60 | AST only |
| movableclass.cpp:65:11:65:11 | movableclass.cpp:65:13:65:18 | AST only |
| movableclass.cpp:65:11:65:21 | movableclass.cpp:65:13:65:18 | IR only |
| smart_pointer.cpp:12:10:12:10 | smart_pointer.cpp:11:52:11:57 | AST only |
| smart_pointer.cpp:13:10:13:10 | smart_pointer.cpp:11:52:11:57 | AST only |
| smart_pointer.cpp:24:10:24:10 | smart_pointer.cpp:23:52:23:57 | AST only |
@@ -191,10 +181,6 @@
| stringstream.cpp:143:11:143:11 | stringstream.cpp:143:14:143:21 | IR only |
| stringstream.cpp:143:11:143:22 | stringstream.cpp:143:14:143:19 | IR only |
| stringstream.cpp:143:11:143:22 | stringstream.cpp:143:14:143:21 | IR only |
| structlikeclass.cpp:35:8:35:9 | structlikeclass.cpp:29:22:29:27 | AST only |
| structlikeclass.cpp:36:8:36:9 | structlikeclass.cpp:30:24:30:29 | AST only |
| structlikeclass.cpp:37:8:37:9 | structlikeclass.cpp:29:22:29:27 | AST only |
| structlikeclass.cpp:60:8:60:9 | structlikeclass.cpp:55:40:55:45 | AST only |
| swap1.cpp:78:12:78:16 | swap1.cpp:69:23:69:23 | AST only |
| swap1.cpp:87:13:87:17 | swap1.cpp:82:16:82:21 | AST only |
| swap1.cpp:88:13:88:17 | swap1.cpp:81:27:81:28 | AST only |

View File

@@ -17,9 +17,23 @@
| arrayassignment.cpp:141:7:141:13 | access to array | arrayassignment.cpp:139:10:139:15 | call to source |
| arrayassignment.cpp:145:7:145:13 | access to array | arrayassignment.cpp:144:12:144:17 | call to source |
| arrayassignment.cpp:146:7:146:13 | access to array | arrayassignment.cpp:144:12:144:17 | call to source |
| copyableclass.cpp:40:8:40:9 | s1 | copyableclass.cpp:34:22:34:27 | call to source |
| copyableclass.cpp:41:8:41:9 | s2 | copyableclass.cpp:35:24:35:29 | call to source |
| copyableclass.cpp:42:8:42:9 | s3 | copyableclass.cpp:34:22:34:27 | call to source |
| copyableclass.cpp:43:8:43:9 | s4 | copyableclass.cpp:38:8:38:13 | call to source |
| copyableclass.cpp:65:8:65:9 | s1 | copyableclass.cpp:60:40:60:45 | call to source |
| copyableclass.cpp:66:8:66:9 | s2 | copyableclass.cpp:63:24:63:29 | call to source |
| copyableclass.cpp:67:11:67:21 | (reference dereference) | copyableclass.cpp:67:13:67:18 | call to source |
| format.cpp:157:7:157:22 | (int)... | format.cpp:147:12:147:25 | call to source |
| format.cpp:157:7:157:22 | access to array | format.cpp:147:12:147:25 | call to source |
| format.cpp:158:7:158:27 | ... + ... | format.cpp:148:16:148:30 | call to source |
| movableclass.cpp:44:8:44:9 | s1 | movableclass.cpp:39:21:39:26 | call to source |
| movableclass.cpp:45:8:45:9 | s2 | movableclass.cpp:40:23:40:28 | call to source |
| movableclass.cpp:46:8:46:9 | s3 | movableclass.cpp:42:8:42:13 | call to source |
| movableclass.cpp:54:8:54:9 | s1 | movableclass.cpp:50:38:50:43 | call to source |
| movableclass.cpp:55:8:55:9 | s2 | movableclass.cpp:52:23:52:28 | call to source |
| movableclass.cpp:64:8:64:9 | s2 | movableclass.cpp:23:55:23:60 | call to source |
| movableclass.cpp:65:11:65:21 | (reference dereference) | movableclass.cpp:65:13:65:18 | call to source |
| string.cpp:28:7:28:7 | (const char *)... | string.cpp:24:12:24:17 | call to source |
| string.cpp:28:7:28:7 | a | string.cpp:24:12:24:17 | call to source |
| string.cpp:55:7:55:8 | cs | string.cpp:50:19:50:24 | call to source |
@@ -56,7 +70,11 @@
| stringstream.cpp:143:11:143:22 | (reference dereference) | stringstream.cpp:143:14:143:21 | (const char *)... |
| stringstream.cpp:143:11:143:22 | (reference to) | stringstream.cpp:143:14:143:19 | call to source |
| stringstream.cpp:143:11:143:22 | (reference to) | stringstream.cpp:143:14:143:21 | (const char *)... |
| structlikeclass.cpp:35:8:35:9 | s1 | structlikeclass.cpp:29:22:29:27 | call to source |
| structlikeclass.cpp:36:8:36:9 | s2 | structlikeclass.cpp:30:24:30:29 | call to source |
| structlikeclass.cpp:37:8:37:9 | s3 | structlikeclass.cpp:29:22:29:27 | call to source |
| structlikeclass.cpp:38:8:38:9 | s4 | structlikeclass.cpp:33:8:33:13 | call to source |
| structlikeclass.cpp:60:8:60:9 | s1 | structlikeclass.cpp:55:40:55:45 | call to source |
| structlikeclass.cpp:61:8:61:9 | s2 | structlikeclass.cpp:58:24:58:29 | call to source |
| structlikeclass.cpp:62:8:62:20 | ... = ... | structlikeclass.cpp:62:13:62:18 | call to source |
| swap1.cpp:73:12:73:16 | data1 | swap1.cpp:71:15:71:20 | call to source |

View File

@@ -136,7 +136,7 @@
| test.c:183:14:183:14 | a | -7 |
| test.c:184:5:184:9 | total | -45 |
| test.c:184:14:184:14 | b | -7 |
| test.c:184:16:184:16 | c | -0 |
| test.c:184:16:184:16 | c | 0 |
| test.c:186:13:186:13 | a | -2147483648 |
| test.c:186:18:186:18 | a | -7 |
| test.c:187:14:187:14 | a | -7 |

View File

@@ -115,7 +115,7 @@
| test.c:168:14:168:14 | a | 11 |
| test.c:169:5:169:9 | total | 8 |
| test.c:169:14:169:14 | b | 11 |
| test.c:169:16:169:16 | c | -0 |
| test.c:169:16:169:16 | c | 0 |
| test.c:171:13:171:13 | a | 2147483647 |
| test.c:171:18:171:18 | a | 2147483647 |
| test.c:172:14:172:14 | a | 11 |

View File

@@ -31,7 +31,7 @@
| PointlessComparison.c:126:12:126:18 | ... >= ... | Comparison is always true because a >= 20. |
| PointlessComparison.c:129:12:129:16 | ... > ... | Comparison is always false because a <= 3. |
| PointlessComparison.c:197:7:197:11 | ... < ... | Comparison is always false because x >= 0. |
| PointlessComparison.c:264:12:264:22 | ... >= ... | Comparison is always true because dbl >= 0 and -0 >= - .... |
| PointlessComparison.c:264:12:264:22 | ... >= ... | Comparison is always true because dbl >= 0 and 0 >= - .... |
| PointlessComparison.c:273:9:273:18 | ... > ... | Comparison is always false because c <= 0. |
| PointlessComparison.c:283:13:283:19 | ... >= ... | Comparison is always true because c >= 11. |
| PointlessComparison.c:294:9:294:16 | ... >= ... | Comparison is always false because ui1 <= 0. |

View File

@@ -79,4 +79,40 @@ int containsIfDef(int x) {
#endif
return result >= 0;
}
}
void negativeZero1(int val) {
if (val >= 0)
{
val = -val;
}
if (val == 0) // GOOD [NO LONGER REPORTED]
;
}
void negativeZero2(int val) {
if (val >= 0)
{
val = 0 - val;
}
if (val == 0) // GOOD
;
}
void negativeZero3(int val) {
if (val >= 0)
{
val *= -1;
}
if (val == 0) // GOOD [NO LONGER REPORTED]
;
}
void negativeZero4(int val) {
if (val >= 0)
{
val = val * -1;
}
if (val == 0) // GOOD [NO LONGER REPORTED]
;
}

View File

@@ -33,16 +33,12 @@ namespace Semmle.BuildAnalyser
/// (Indexing is performed at a later stage by IndexReferences()).
/// </summary>
/// <param name="dir">The directory to index.</param>
/// <returns>The number of DLLs within this directory.</returns>
int AddReferenceDirectory(string dir)
void AddReferenceDirectory(string dir)
{
int count = 0;
foreach (var dll in new DirectoryInfo(dir).EnumerateFiles("*.dll", SearchOption.AllDirectories))
{
dlls.Add(dll.FullName);
++count;
pendingDllsToIndex.Enqueue(dll.FullName);
}
return count;
}
/// <summary>
@@ -52,38 +48,42 @@ namespace Semmle.BuildAnalyser
void IndexReferences()
{
// Read all of the files
foreach (var filename in dlls)
foreach (var filename in pendingDllsToIndex)
{
var info = AssemblyInfo.ReadFromFile(filename);
if (info.Valid)
{
assemblyInfo[filename] = info;
}
else
{
failedDlls.Add(filename);
}
IndexReference(filename);
}
// Index "assemblyInfo" by version string
// The OrderBy is used to ensure that we by default select the highest version number.
foreach (var info in assemblyInfo.Values.OrderBy(info => info.Id))
foreach (var info in assemblyInfoByFileName.Values.OrderBy(info => info.Id))
{
foreach (var index in info.IndexStrings)
references[index] = info;
assemblyInfoById[index] = info;
}
}
private void IndexReference(string filename)
{
try
{
var info = AssemblyInfo.ReadFromFile(filename);
assemblyInfoByFileName[filename] = info;
}
catch (AssemblyLoadException)
{
failedAssemblyInfoFileNames.Add(filename);
}
}
/// <summary>
/// The number of DLLs which are assemblies.
/// </summary>
public int AssemblyCount => assemblyInfo.Count;
public int AssemblyCount => assemblyInfoByFileName.Count;
/// <summary>
/// The number of DLLs which weren't assemblies. (E.g. C++).
/// </summary>
public int NonAssemblyCount => failedDlls.Count;
public int NonAssemblyCount => failedAssemblyInfoFileNames.Count;
/// <summary>
/// Given an assembly id, determine its full info.
@@ -93,70 +93,67 @@ namespace Semmle.BuildAnalyser
public AssemblyInfo ResolveReference(string id)
{
// Fast path if we've already seen this before.
if (failedReferences.Contains(id))
return AssemblyInfo.Invalid;
if (failedAssemblyInfoIds.Contains(id))
throw new AssemblyLoadException();
var query = AssemblyInfo.MakeFromId(id);
id = query.Id; // Sanitise the id.
string assemblyName;
(id, assemblyName) = AssemblyInfo.ComputeSanitizedAssemblyInfo(id);
// Look up the id in our references map.
AssemblyInfo result;
if (references.TryGetValue(id, out result))
if (assemblyInfoById.TryGetValue(id, out AssemblyInfo? result))
{
// The string is in the references map.
return result;
}
else
// Attempt to load the reference from the GAC.
try
{
// Attempt to load the reference from the GAC.
try
{
var loadedAssembly = System.Reflection.Assembly.ReflectionOnlyLoad(id);
var loadedAssembly = System.Reflection.Assembly.ReflectionOnlyLoad(id);
if (loadedAssembly != null)
{
// The assembly was somewhere we haven't indexed before.
// Add this assembly to our index so that subsequent lookups are faster.
if (loadedAssembly != null)
{
// The assembly was somewhere we haven't indexed before.
// Add this assembly to our index so that subsequent lookups are faster.
result = AssemblyInfo.MakeFromAssembly(loadedAssembly);
references[id] = result;
assemblyInfo[loadedAssembly.Location] = result;
return result;
}
}
catch (FileNotFoundException)
{
// A suitable assembly could not be found
}
catch (FileLoadException)
{
// The assembly cannot be loaded for some reason
// e.g. The name is malformed.
}
catch (PlatformNotSupportedException)
{
// .NET Core does not have a GAC.
}
// Fallback position - locate the assembly by its lower-case name only.
var asmName = query.Name.ToLowerInvariant();
if (references.TryGetValue(asmName, out result))
{
references[asmName] = result; // Speed up the next time the same string is resolved
result = AssemblyInfo.MakeFromAssembly(loadedAssembly);
assemblyInfoById[id] = result;
assemblyInfoByFileName[loadedAssembly.Location] = result;
return result;
}
failedReferences.Add(id); // Fail early next time
return AssemblyInfo.Invalid;
}
catch (FileNotFoundException)
{
// A suitable assembly could not be found
}
catch (FileLoadException)
{
// The assembly cannot be loaded for some reason
// e.g. The name is malformed.
}
catch (PlatformNotSupportedException)
{
// .NET Core does not have a GAC.
}
// Fallback position - locate the assembly by its lower-case name only.
var asmName = assemblyName.ToLowerInvariant();
if (assemblyInfoById.TryGetValue(asmName, out result))
{
assemblyInfoById[asmName] = result; // Speed up the next time the same string is resolved
return result;
}
failedAssemblyInfoIds.Add(id); // Fail early next time
throw new AssemblyLoadException();
}
/// <summary>
/// All the assemblies we have indexed.
/// </summary>
public IEnumerable<AssemblyInfo> AllAssemblies => assemblyInfo.Select(a => a.Value);
public IEnumerable<AssemblyInfo> AllAssemblies => assemblyInfoByFileName.Select(a => a.Value);
/// <summary>
/// Retrieve the assembly info of a pre-cached assembly.
@@ -165,32 +162,32 @@ namespace Semmle.BuildAnalyser
/// <returns>The assembly info.</returns>
public AssemblyInfo GetAssemblyInfo(string filepath)
{
if(assemblyInfo.TryGetValue(filepath, out var info))
if (assemblyInfoByFileName.TryGetValue(filepath, out var info))
{
return info;
}
else
IndexReference(filepath);
if (assemblyInfoByFileName.TryGetValue(filepath, out info))
{
info = AssemblyInfo.ReadFromFile(filepath);
assemblyInfo.Add(filepath, info);
return info;
}
throw new AssemblyLoadException();
}
// List of pending DLLs to index.
readonly List<string> dlls = new List<string>();
readonly Queue<string> pendingDllsToIndex = new Queue<string>();
// Map from filename to assembly info.
readonly Dictionary<string, AssemblyInfo> assemblyInfo = new Dictionary<string, AssemblyInfo>();
readonly Dictionary<string, AssemblyInfo> assemblyInfoByFileName = new Dictionary<string, AssemblyInfo>();
// List of DLLs which are not assemblies.
// We probably don't need to keep this
readonly List<string> failedDlls = new List<string>();
readonly List<string> failedAssemblyInfoFileNames = new List<string>();
// Map from assembly id (in various formats) to the full info.
readonly Dictionary<string, AssemblyInfo> references = new Dictionary<string, AssemblyInfo>();
readonly Dictionary<string, AssemblyInfo> assemblyInfoById = new Dictionary<string, AssemblyInfo>();
// Set of failed assembly ids.
readonly HashSet<string> failedReferences = new HashSet<string>();
readonly HashSet<string> failedAssemblyInfoIds = new HashSet<string>();
}
}

View File

@@ -16,35 +16,30 @@ namespace Semmle.BuildAnalyser
/// <summary>
/// The file containing the assembly.
/// </summary>
public string Filename { get; private set; }
/// <summary>
/// Was the information correctly determined?
/// </summary>
public bool Valid { get; private set; }
public string Filename { get; }
/// <summary>
/// The short name of this assembly.
/// </summary>
public string Name { get; private set; }
public string Name { get; }
/// <summary>
/// The version number of this assembly.
/// </summary>
public System.Version Version { get; private set; }
public System.Version? Version { get; }
/// <summary>
/// The public key token of the assembly.
/// </summary>
public string PublicKeyToken { get; private set; }
public string? PublicKeyToken { get; }
/// <summary>
/// The culture.
/// </summary>
public string Culture { get; private set; }
public string? Culture { get; }
/// <summary>
/// Get/parse a canonical ID of this assembly.
/// Gets the canonical ID of this assembly.
/// </summary>
public string Id
{
@@ -59,25 +54,6 @@ namespace Semmle.BuildAnalyser
result = string.Format("{0}, PublicKeyToken={1}", result, PublicKeyToken);
return result;
}
private set
{
var sections = value.Split(new string[] { ", " }, StringSplitOptions.None);
Name = sections.First();
foreach (var section in sections.Skip(1))
{
if (section.StartsWith("Version="))
Version = new Version(section.Substring(8));
else if (section.StartsWith("Culture="))
Culture = section.Substring(8);
else if (section.StartsWith("PublicKeyToken="))
PublicKeyToken = section.Substring(15);
// else: Some other field like processorArchitecture - ignore.
}
}
}
public override string ToString() => Id;
@@ -100,27 +76,58 @@ namespace Semmle.BuildAnalyser
}
}
/// <summary>
/// Get an invalid assembly info (Valid==false).
/// </summary>
public static AssemblyInfo Invalid { get; } = new AssemblyInfo();
private AssemblyInfo(string id, string filename)
{
var sections = id.Split(new string[] { ", " }, StringSplitOptions.None);
private AssemblyInfo() { }
Name = sections.First();
foreach (var section in sections.Skip(1))
{
if (section.StartsWith("Version="))
Version = new Version(section.Substring(8));
else if (section.StartsWith("Culture="))
Culture = section.Substring(8);
else if (section.StartsWith("PublicKeyToken="))
PublicKeyToken = section.Substring(15);
// else: Some other field like processorArchitecture - ignore.
}
Filename = filename;
}
private AssemblyInfo(string filename, string name, Version version, string culture, string publicKeyToken)
{
Filename = filename;
Name = name;
Version = version;
Culture = culture;
PublicKeyToken = publicKeyToken;
}
/// <summary>
/// Get AssemblyInfo from a loaded Assembly.
/// </summary>
/// <param name="assembly">The assembly.</param>
/// <returns>Info about the assembly.</returns>
public static AssemblyInfo MakeFromAssembly(Assembly assembly) => new AssemblyInfo() { Valid = true, Filename = assembly.Location, Id = assembly.FullName };
public static AssemblyInfo MakeFromAssembly(Assembly assembly)
{
if (assembly.FullName is null)
{
throw new InvalidOperationException("Assembly with empty full name is not expected.");
}
return new AssemblyInfo(assembly.FullName, assembly.Location);
}
/// <summary>
/// Parse an assembly name/Id into an AssemblyInfo,
/// populating the available fields and leaving the others null.
/// Returns the id and name of the assembly that would be created from the received id.
/// </summary>
/// <param name="id">The assembly name/Id.</param>
/// <returns>The deconstructed assembly info.</returns>
public static AssemblyInfo MakeFromId(string id) => new AssemblyInfo() { Valid = true, Id = id };
public static (string id, string name) ComputeSanitizedAssemblyInfo(string id)
{
var assembly = new AssemblyInfo(id, string.Empty);
return (assembly.Id, assembly.Name);
}
/// <summary>
/// Reads the assembly info from a file.
@@ -131,48 +138,42 @@ namespace Semmle.BuildAnalyser
/// <returns>The information about the assembly.</returns>
public static AssemblyInfo ReadFromFile(string filename)
{
var result = new AssemblyInfo() { Filename = filename };
try
{
/* This method is significantly faster and more lightweight than using
* System.Reflection.Assembly.ReflectionOnlyLoadFrom. It also allows
* loading the same assembly from different locations.
*/
using (var pereader = new System.Reflection.PortableExecutable.PEReader(new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read)))
using (var sha1 = new SHA1CryptoServiceProvider())
using var pereader = new System.Reflection.PortableExecutable.PEReader(new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read));
using var sha1 = new SHA1CryptoServiceProvider();
var metadata = pereader.GetMetadata();
unsafe
{
var metadata = pereader.GetMetadata();
unsafe
{
var reader = new System.Reflection.Metadata.MetadataReader(metadata.Pointer, metadata.Length);
var def = reader.GetAssemblyDefinition();
var reader = new System.Reflection.Metadata.MetadataReader(metadata.Pointer, metadata.Length);
var def = reader.GetAssemblyDefinition();
// This is how you compute the public key token from the full public key.
// The last 8 bytes of the SHA1 of the public key.
var publicKey = reader.GetBlobBytes(def.PublicKey);
var publicKeyToken = sha1.ComputeHash(publicKey);
var publicKeyString = new StringBuilder();
foreach (var b in publicKeyToken.Skip(12).Reverse())
publicKeyString.AppendFormat("{0:x2}", b);
// This is how you compute the public key token from the full public key.
// The last 8 bytes of the SHA1 of the public key.
var publicKey = reader.GetBlobBytes(def.PublicKey);
var publicKeyToken = sha1.ComputeHash(publicKey);
var publicKeyString = new StringBuilder();
foreach (var b in publicKeyToken.Skip(12).Reverse())
publicKeyString.AppendFormat("{0:x2}", b);
result.Name = reader.GetString(def.Name);
result.Version = def.Version;
result.Culture = def.Culture.IsNil ? "neutral" : reader.GetString(def.Culture);
result.PublicKeyToken = publicKeyString.ToString();
result.Valid = true;
}
var culture = def.Culture.IsNil ? "neutral" : reader.GetString(def.Culture);
return new AssemblyInfo(filename, reader.GetString(def.Name), def.Version, culture, publicKeyString.ToString());
}
}
catch (BadImageFormatException)
{
// The DLL wasn't an assembly -> result.Valid = false.
// The DLL wasn't an assembly
}
catch (InvalidOperationException)
{
// Some other failure -> result.Valid = false.
// Some other failure
}
return result;
throw new AssemblyLoadException();
}
}
}

View File

@@ -0,0 +1,6 @@
using System;
namespace Semmle.BuildAnalyser
{
public class AssemblyLoadException : Exception { }
}

View File

@@ -49,7 +49,6 @@ namespace Semmle.BuildAnalyser
class BuildAnalysis : IBuildAnalysis, IDisposable
{
private readonly AssemblyCache assemblyCache;
private readonly NugetPackages nuget;
private readonly IProgressMonitor progressMonitor;
private readonly IDictionary<string, bool> usedReferences = new ConcurrentDictionary<string, bool>();
private readonly IDictionary<string, bool> sources = new ConcurrentDictionary<string, bool>();
@@ -85,8 +84,8 @@ namespace Semmle.BuildAnalyser
{
try
{
nuget = new NugetPackages(sourceDir.FullName, PackageDirectory);
ReadNugetFiles();
var nuget = new NugetPackages(sourceDir.FullName, PackageDirectory);
nuget.InstallPackages(progressMonitor);
}
catch (FileNotFoundException)
{
@@ -175,10 +174,21 @@ namespace Semmle.BuildAnalyser
/// </summary>
void ResolveConflicts()
{
var sortedReferences = usedReferences.
Select(r => assemblyCache.GetAssemblyInfo(r.Key)).
OrderBy(r => r.Version).
ToArray();
var sortedReferences = new List<AssemblyInfo>();
foreach (var usedReference in usedReferences)
{
try
{
var assemblyInfo = assemblyCache.GetAssemblyInfo(usedReference.Key);
sortedReferences.Add(assemblyInfo);
}
catch (AssemblyLoadException)
{
progressMonitor.Log(Util.Logging.Severity.Warning, $"Could not load assembly information from {usedReference.Key}");
}
}
sortedReferences = sortedReferences.OrderBy(r => r.Version).ToList();
Dictionary<string, AssemblyInfo> finalAssemblyList = new Dictionary<string, AssemblyInfo>();
@@ -203,15 +213,6 @@ namespace Semmle.BuildAnalyser
}
}
/// <summary>
/// Find and restore NuGet packages.
/// </summary>
void ReadNugetFiles()
{
nuget.FindPackages();
nuget.InstallPackages(progressMonitor);
}
/// <summary>
/// Store that a particular reference file is used.
/// </summary>
@@ -293,15 +294,15 @@ namespace Semmle.BuildAnalyser
foreach (var @ref in csProj.References)
{
AssemblyInfo resolved = assemblyCache.ResolveReference(@ref);
if (!resolved.Valid)
try
{
AssemblyInfo resolved = assemblyCache.ResolveReference(@ref);
UseReference(resolved.Filename);
}
catch (AssemblyLoadException)
{
UnresolvedReference(@ref, project.FullName);
}
else
{
UseReference(resolved.Filename);
}
}
foreach (var src in csProj.Sources)

View File

@@ -1,4 +1,5 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml;
@@ -12,7 +13,7 @@ namespace Semmle.BuildAnalyser
{
private string Filename { get; }
private string Directory => Path.GetDirectoryName(Filename);
private string Directory { get; }
/// <summary>
/// Reads the .csproj file.
@@ -22,20 +23,29 @@ namespace Semmle.BuildAnalyser
{
Filename = filename.FullName;
var directoryName = Path.GetDirectoryName(Filename);
if (directoryName is null)
{
throw new Extraction.InternalError($"Directory of file '{Filename}' is null");
}
Directory = directoryName;
try
{
// This can fail if the .csproj is invalid or has
// unrecognised content or is the wrong version.
// This currently always fails on Linux because
// Microsoft.Build is not cross platform.
ReadMsBuildProject(filename);
(csFiles, references) = ReadMsBuildProject(filename);
}
catch // lgtm[cs/catch-of-all-exceptions]
{
// There was some reason why the project couldn't be loaded.
// Fall back to reading the Xml document directly.
// This method however doesn't handle variable expansion.
ReadProjectFileAsXml(filename);
(csFiles, references) = ReadProjectFileAsXml(filename, Directory);
}
}
@@ -45,21 +55,23 @@ namespace Semmle.BuildAnalyser
/// and there seems to be no way to make it succeed. Fails on Linux.
/// </summary>
/// <param name="filename">The file to read.</param>
private void ReadMsBuildProject(FileInfo filename)
private static (string[] csFiles, string[] references) ReadMsBuildProject(FileInfo filename)
{
var msbuildProject = new Microsoft.Build.Execution.ProjectInstance(filename.FullName);
references = msbuildProject.
var references = msbuildProject.
Items.
Where(item => item.ItemType == "Reference").
Select(item => item.EvaluatedInclude).
ToArray();
csFiles = msbuildProject.Items
var csFiles = msbuildProject.Items
.Where(item => item.ItemType == "Compile")
.Select(item => item.GetMetadataValue("FullPath"))
.Where(fn => fn.EndsWith(".cs"))
.ToArray();
return (csFiles, references);
}
/// <summary>
@@ -67,14 +79,14 @@ namespace Semmle.BuildAnalyser
/// This doesn't handle variables etc, and should only used as a
/// fallback if ReadMsBuildProject() fails.
/// </summary>
/// <param name="filename">The .csproj file.</param>
private void ReadProjectFileAsXml(FileInfo filename)
/// <param name="fileName">The .csproj file.</param>
private static (string[] csFiles, string[] references) ReadProjectFileAsXml(FileInfo fileName, string directoryName)
{
var projFile = new XmlDocument();
var mgr = new XmlNamespaceManager(projFile.NameTable);
mgr.AddNamespace("msbuild", "http://schemas.microsoft.com/developer/msbuild/2003");
projFile.Load(filename.FullName);
var projDir = filename.Directory;
projFile.Load(fileName.FullName);
var projDir = fileName.Directory;
var root = projFile.DocumentElement;
// Figure out if it's dotnet core
@@ -83,46 +95,39 @@ namespace Semmle.BuildAnalyser
if (netCoreProjectFile)
{
var relativeCsIncludes =
root.SelectNodes("/Project/ItemGroup/Compile/@Include", mgr).
var explicitCsFiles = root.SelectNodes("/Project/ItemGroup/Compile/@Include", mgr).
NodeList().
Select(node => node.Value).
ToArray();
var explicitCsFiles = relativeCsIncludes.
Select(cs => Path.DirectorySeparatorChar == '/' ? cs.Replace("\\", "/") : cs).
Select(f => Path.GetFullPath(Path.Combine(projDir.FullName, f)));
var additionalCsFiles = System.IO.Directory.GetFiles(Directory, "*.cs", SearchOption.AllDirectories);
var additionalCsFiles = System.IO.Directory.GetFiles(directoryName, "*.cs", SearchOption.AllDirectories);
csFiles = explicitCsFiles.Concat(additionalCsFiles).ToArray();
references = new string[0];
return (explicitCsFiles.Concat(additionalCsFiles).ToArray(), Array.Empty<string>());
}
else
{
references =
root.SelectNodes("/msbuild:Project/msbuild:ItemGroup/msbuild:Reference/@Include", mgr).
NodeList().
Select(node => node.Value).
ToArray();
var references =
root.SelectNodes("/msbuild:Project/msbuild:ItemGroup/msbuild:Reference/@Include", mgr).
NodeList().
Select(node => node.Value).
ToArray();
var relativeCsIncludes =
root.SelectNodes("/msbuild:Project/msbuild:ItemGroup/msbuild:Compile/@Include", mgr).
NodeList().
Select(node => node.Value).
ToArray();
var relativeCsIncludes =
root.SelectNodes("/msbuild:Project/msbuild:ItemGroup/msbuild:Compile/@Include", mgr).
NodeList().
Select(node => node.Value).
ToArray();
csFiles = relativeCsIncludes.
Select(cs => Path.DirectorySeparatorChar == '/' ? cs.Replace("\\", "/") : cs).
Select(f => Path.GetFullPath(Path.Combine(projDir.FullName, f))).
ToArray();
}
var csFiles = relativeCsIncludes.
Select(cs => Path.DirectorySeparatorChar == '/' ? cs.Replace("\\", "/") : cs).
Select(f => Path.GetFullPath(Path.Combine(projDir.FullName, f))).
ToArray();
return (csFiles, references);
}
string[] references;
string[] csFiles;
readonly string[] references;
readonly string[] csFiles;
/// <summary>
/// The list of references as a list of assembly IDs.
@@ -145,8 +150,7 @@ namespace Semmle.BuildAnalyser
/// <returns>A more useful data type.</returns>
public static IEnumerable<XmlNode> NodeList(this XmlNodeList list)
{
foreach (var i in list)
yield return i as XmlNode;
return list.OfType<XmlNode>();
}
}
}

View File

@@ -25,24 +25,22 @@ namespace Semmle.BuildAnalyser
// Expect nuget.exe to be in a `nuget` directory under the directory containing this exe.
var currentAssembly = System.Reflection.Assembly.GetExecutingAssembly().Location;
nugetExe = Path.Combine(Path.GetDirectoryName(currentAssembly), "nuget", "nuget.exe");
string? directory = Path.GetDirectoryName(currentAssembly);
if (directory is null)
throw new FileNotFoundException($"Directory path '{currentAssembly}' of current assembly is null");
nugetExe = Path.Combine(directory, "nuget", "nuget.exe");
if (!File.Exists(nugetExe))
throw new FileNotFoundException(string.Format("NuGet could not be found at {0}", nugetExe));
}
/// <summary>
/// Locate all NuGet packages but don't download them yet.
/// </summary>
public void FindPackages()
{
packages = new DirectoryInfo(SourceDirectory).
EnumerateFiles("packages.config", SearchOption.AllDirectories).
ToArray();
}
// List of package files to download.
FileInfo[] packages;
private readonly FileInfo[] packages;
/// <summary>
/// The list of package files.

View File

@@ -120,7 +120,7 @@ namespace Semmle.Extraction.CSharp.Standalone
/// <summary>
/// The solution file to analyse, or null if not specified.
/// </summary>
public string SolutionFile;
public string? SolutionFile;
/// <summary>
/// Whether the extraction phase should be skipped (dry-run).

View File

@@ -25,51 +25,23 @@ namespace Semmle.Extraction.CSharp.Standalone
/// </summary>
class Analysis : IDisposable
{
readonly ILogger logger;
public Analysis(ILogger logger)
public Analysis(ILogger logger, Options options)
{
this.logger = logger;
var progressMonitor = new ProgressMonitor(logger);
buildAnalysis = new BuildAnalysis(options, progressMonitor);
References = buildAnalysis.ReferenceFiles;
Extraction = new Extraction(options.SrcDir);
Extraction.Sources.AddRange(options.SolutionFile == null ? buildAnalysis.AllSourceFiles : buildAnalysis.ProjectSourceFiles);
}
// The extraction configuration for the entire project.
Extraction projectExtraction;
public IEnumerable<string> References
{
get; private set;
}
public IEnumerable<string> References { get; }
/// <summary>
/// The extraction configuration.
/// </summary>
public Extraction Extraction => projectExtraction;
public Extraction Extraction { get; }
/// <summary>
/// Creates an extraction for the current directory
/// and adds it to the list of all extractions.
/// </summary>
/// <param name="dir">The directory of the extraction.</param>
/// <returns>The extraction.</returns>
void CreateExtraction(string dir)
{
projectExtraction = new Extraction(dir);
}
BuildAnalysis buildAnalysis;
/// <summary>
/// Analyse projects/solution and resolves references.
/// </summary>
/// <param name="options">The build analysis options.</param>
public void AnalyseProjects(Options options)
{
CreateExtraction(options.SrcDir);
var progressMonitor = new ProgressMonitor(logger);
buildAnalysis = new BuildAnalysis(options, progressMonitor);
References = buildAnalysis.ReferenceFiles;
projectExtraction.Sources.AddRange(options.SolutionFile == null ? buildAnalysis.AllSourceFiles : buildAnalysis.ProjectSourceFiles);
}
readonly BuildAnalysis buildAnalysis;
public void Dispose()
{
@@ -84,7 +56,6 @@ namespace Semmle.Extraction.CSharp.Standalone
var options = Options.Create(args);
// options.CIL = true; // To do: Enable this
var output = new ConsoleLogger(options.Verbosity);
using var a = new Analysis(output);
if (options.Help)
{
@@ -98,7 +69,7 @@ namespace Semmle.Extraction.CSharp.Standalone
var start = DateTime.Now;
output.Log(Severity.Info, "Running C# standalone extractor");
a.AnalyseProjects(options);
using var a = new Analysis(output, options);
int sourceFiles = a.Extraction.Sources.Count();
if (sourceFiles == 0)
@@ -117,7 +88,7 @@ namespace Semmle.Extraction.CSharp.Standalone
new ExtractionProgress(output),
new FileLogger(options.Verbosity, Extractor.GetCSharpLogPath()),
options);
output.Log(Severity.Info, $"Extraction completed in {DateTime.Now-start}");
output.Log(Severity.Info, $"Extraction completed in {DateTime.Now - start}");
}
return 0;

View File

@@ -10,6 +10,7 @@
<TreatWarningsAsErrors>false</TreatWarningsAsErrors>
<WarningsAsErrors />
<RuntimeIdentifiers>win-x64;linux-x64;osx-x64</RuntimeIdentifiers>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>

View File

@@ -57,10 +57,15 @@ namespace Semmle.BuildAnalyser
/// <summary>
/// List of projects which were mentioned but don't exist on disk.
/// </summary>
public IEnumerable<string> MissingProjects =>
public IEnumerable<string> MissingProjects
{
get
{
// Only projects in the solution file can be missing.
// (NestedProjects are located on disk so always exist.)
MsBuildProjects.Where(p => !File.Exists(p));
return MsBuildProjects.Where(p => !File.Exists(p));
}
}
/// <summary>
/// The list of project files.

View File

@@ -136,16 +136,32 @@ namespace Semmle.Extraction.CSharp.Entities
trapFile.metadata_handle(this, Location, MetadataTokens.GetToken(handle.Value));
}
static System.Reflection.PropertyInfo GetPropertyInfo(object o, string name) =>
o.GetType().GetProperty(name, System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetProperty);
public Handle? MetadataHandle
{
get
{
var propertyInfo = symbol.GetType().GetProperty("Handle",
System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.GetProperty);
var handleProp = GetPropertyInfo(symbol, "Handle");
object handleObj = symbol;
if (propertyInfo != null)
if (handleProp is null)
{
switch (propertyInfo.GetValue(symbol))
var underlyingSymbolProp = GetPropertyInfo(symbol, "UnderlyingSymbol");
if (underlyingSymbolProp is object)
{
if (underlyingSymbolProp.GetValue(symbol) is object underlying)
{
handleProp = GetPropertyInfo(underlying, "Handle");
handleObj = underlying;
}
}
}
if (handleProp is object)
{
switch (handleProp.GetValue(handleObj))
{
case MethodDefinitionHandle md: return md;
case TypeDefinitionHandle td: return td;

View File

@@ -11,13 +11,24 @@ namespace Semmle.Extraction.CSharp.Entities
{
class NamedType : Type<INamedTypeSymbol>
{
NamedType(Context cx, INamedTypeSymbol init)
NamedType(Context cx, INamedTypeSymbol init, bool constructUnderlyingTupleType)
: base(cx, init)
{
typeArgumentsLazy = new Lazy<Type[]>(() => symbol.TypeArguments.Select(t => Create(cx, t)).ToArray());
this.constructUnderlyingTupleType = constructUnderlyingTupleType;
}
public static NamedType Create(Context cx, INamedTypeSymbol type) => NamedTypeFactory.Instance.CreateEntityFromSymbol(cx, type);
public static NamedType Create(Context cx, INamedTypeSymbol type) =>
NamedTypeFactory.Instance.CreateEntityFromSymbol(cx, type);
/// <summary>
/// Creates a named type entity from a tuple type. Unlike `Create`, this
/// will create an entity for the underlying `System.ValueTuple` struct.
/// For example, `(int, string)` will result in an entity for
/// `System.ValueTuple<int, string>`.
/// </summary>
public static NamedType CreateNamedTypeFromTupleType(Context cx, INamedTypeSymbol type) =>
UnderlyingTupleTypeFactory.Instance.CreateEntity(cx, (new SymbolEqualityWrapper(type), typeof(TupleType)), type);
public override bool NeedsPopulation => base.NeedsPopulation || symbol.TypeKind == TypeKind.Error;
@@ -51,7 +62,10 @@ namespace Semmle.Extraction.CSharp.Entities
}
else
{
trapFile.constructed_generic(this, Type.Create(Context, symbol.ConstructedFrom).TypeRef);
var unbound = constructUnderlyingTupleType
? CreateNamedTypeFromTupleType(Context, symbol.ConstructedFrom)
: Type.Create(Context, symbol.ConstructedFrom);
trapFile.constructed_generic(this, unbound.TypeRef);
for (int i = 0; i < symbol.TypeArguments.Length; ++i)
{
@@ -60,7 +74,7 @@ namespace Semmle.Extraction.CSharp.Entities
}
}
PopulateType(trapFile);
PopulateType(trapFile, constructUnderlyingTupleType);
if (symbol.EnumUnderlyingType != null)
{
@@ -76,6 +90,8 @@ namespace Semmle.Extraction.CSharp.Entities
}
readonly Lazy<Type[]> typeArgumentsLazy;
private readonly bool constructUnderlyingTupleType;
public Type[] TypeArguments => typeArgumentsLazy.Value;
public override IEnumerable<Type> TypeMentions => TypeArguments;
@@ -115,7 +131,7 @@ namespace Semmle.Extraction.CSharp.Entities
trapFile.Write('*');
else
{
symbol.BuildTypeId(Context, trapFile, symbol);
symbol.BuildTypeId(Context, trapFile, symbol, constructUnderlyingTupleType);
trapFile.Write(";type");
}
}
@@ -161,7 +177,14 @@ namespace Semmle.Extraction.CSharp.Entities
{
public static readonly NamedTypeFactory Instance = new NamedTypeFactory();
public NamedType Create(Context cx, INamedTypeSymbol init) => new NamedType(cx, init);
public NamedType Create(Context cx, INamedTypeSymbol init) => new NamedType(cx, init, false);
}
class UnderlyingTupleTypeFactory : ICachedEntityFactory<INamedTypeSymbol, NamedType>
{
public static readonly UnderlyingTupleTypeFactory Instance = new UnderlyingTupleTypeFactory();
public NamedType Create(Context cx, INamedTypeSymbol init) => new NamedType(cx, init, true);
}
// Do not create typerefs of constructed generics as they are always in the current trap file.

View File

@@ -41,7 +41,8 @@ namespace Semmle.Extraction.CSharp.Entities
PopulateType(trapFile);
PopulateGenerics();
var underlyingType = NamedType.Create(Context, symbol.TupleUnderlyingType);
var underlyingType = NamedType.CreateNamedTypeFromTupleType(Context, symbol.TupleUnderlyingType ?? symbol);
trapFile.tuple_underlying_type(this, underlyingType);
int index = 0;

View File

@@ -47,7 +47,7 @@ namespace Semmle.Extraction.CSharp.Entities
symbol.ContainingType != null && ConstructedOrParentIsConstructed(symbol.ContainingType);
}
static Kinds.TypeKind GetClassType(Context cx, ITypeSymbol t)
static Kinds.TypeKind GetClassType(Context cx, ITypeSymbol t, bool constructUnderlyingTupleType)
{
switch (t.SpecialType)
{
@@ -72,7 +72,9 @@ namespace Semmle.Extraction.CSharp.Entities
{
case TypeKind.Class: return Kinds.TypeKind.CLASS;
case TypeKind.Struct:
return ((INamedTypeSymbol)t).IsTupleType ? Kinds.TypeKind.TUPLE : Kinds.TypeKind.STRUCT;
return ((INamedTypeSymbol)t).IsTupleType && !constructUnderlyingTupleType
? Kinds.TypeKind.TUPLE
: Kinds.TypeKind.STRUCT;
case TypeKind.Interface: return Kinds.TypeKind.INTERFACE;
case TypeKind.Array: return Kinds.TypeKind.ARRAY;
case TypeKind.Enum: return Kinds.TypeKind.ENUM;
@@ -85,7 +87,7 @@ namespace Semmle.Extraction.CSharp.Entities
}
}
protected void PopulateType(TextWriter trapFile)
protected void PopulateType(TextWriter trapFile, bool constructUnderlyingTupleType = false)
{
PopulateMetadataHandle(trapFile);
PopulateAttributes();
@@ -93,9 +95,9 @@ namespace Semmle.Extraction.CSharp.Entities
trapFile.Write("types(");
trapFile.WriteColumn(this);
trapFile.Write(',');
trapFile.WriteColumn((int)GetClassType(Context, symbol));
trapFile.WriteColumn((int)GetClassType(Context, symbol, constructUnderlyingTupleType));
trapFile.Write(",\"");
symbol.BuildDisplayName(Context, trapFile);
symbol.BuildDisplayName(Context, trapFile, constructUnderlyingTupleType);
trapFile.WriteLine("\")");
// Visit base types

View File

@@ -20,7 +20,7 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="3.4.0" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="3.7.0" />
</ItemGroup>
</Project>

View File

@@ -117,7 +117,7 @@ namespace Semmle.Extraction.CSharp
case TypeKind.Delegate:
case TypeKind.Error:
var named = (INamedTypeSymbol)type;
if (named.IsTupleType)
if (named.IsTupleType && named.TupleUnderlyingType is object)
named = named.TupleUnderlyingType;
if (IdDependsOnImpl(named.ContainingType))
return true;
@@ -152,10 +152,11 @@ namespace Semmle.Extraction.CSharp
/// <param name="cx">The extraction context.</param>
/// <param name="trapFile">The trap builder used to store the result.</param>
/// <param name="symbolBeingDefined">The outer symbol being defined (to avoid recursive ids).</param>
public static void BuildTypeId(this ITypeSymbol type, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined) =>
type.BuildTypeId(cx, trapFile, symbolBeingDefined, true);
/// <param name="constructUnderlyingTupleType">Whether to build a type ID for the underlying `System.ValueTuple` struct in the case of tuple types.</param>
public static void BuildTypeId(this ITypeSymbol type, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool constructUnderlyingTupleType = false) =>
type.BuildTypeId(cx, trapFile, symbolBeingDefined, true, constructUnderlyingTupleType);
static void BuildTypeId(this ITypeSymbol type, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool addBaseClass)
static void BuildTypeId(this ITypeSymbol type, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool addBaseClass, bool constructUnderlyingTupleType)
{
using (cx.StackGuard)
{
@@ -173,7 +174,7 @@ namespace Semmle.Extraction.CSharp
case TypeKind.Delegate:
case TypeKind.Error:
var named = (INamedTypeSymbol)type;
named.BuildNamedTypeId(cx, trapFile, symbolBeingDefined, addBaseClass);
named.BuildNamedTypeId(cx, trapFile, symbolBeingDefined, addBaseClass, constructUnderlyingTupleType);
return;
case TypeKind.Pointer:
var ptr = (IPointerTypeSymbol)type;
@@ -195,7 +196,7 @@ namespace Semmle.Extraction.CSharp
}
}
static void BuildOrWriteId(this ISymbol symbol, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool addBaseClass)
static void BuildOrWriteId(this ISymbol symbol, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool addBaseClass, bool constructUnderlyingTupleType = false)
{
// We need to keep track of the symbol being defined in order to avoid cyclic labels.
// For example, in
@@ -210,11 +211,13 @@ namespace Semmle.Extraction.CSharp
//
// ```
// #123 = @"C`1 : IEnumerable<__self___T>"
// ```
// ```
if (SymbolEqualityComparer.Default.Equals(symbol, symbolBeingDefined))
trapFile.Write("__self__");
else if (symbol is ITypeSymbol type && type.IdDependsOn(cx, symbolBeingDefined))
type.BuildTypeId(cx, trapFile, symbolBeingDefined, addBaseClass);
type.BuildTypeId(cx, trapFile, symbolBeingDefined, addBaseClass, constructUnderlyingTupleType);
else if (symbol is INamedTypeSymbol namedType && namedType.IsTupleType && constructUnderlyingTupleType)
trapFile.WriteSubId(NamedType.CreateNamedTypeFromTupleType(cx, namedType));
else
trapFile.WriteSubId(CreateEntity(cx, symbol));
}
@@ -262,9 +265,9 @@ namespace Semmle.Extraction.CSharp
trapFile.Write("::");
}
static void BuildNamedTypeId(this INamedTypeSymbol named, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool addBaseClass)
static void BuildNamedTypeId(this INamedTypeSymbol named, Context cx, TextWriter trapFile, ISymbol symbolBeingDefined, bool addBaseClass, bool constructUnderlyingTupleType)
{
if (named.IsTupleType)
if (!constructUnderlyingTupleType && named.IsTupleType)
{
trapFile.Write('(');
trapFile.BuildList(",", named.TupleElements,
@@ -308,10 +311,10 @@ namespace Semmle.Extraction.CSharp
}
else
{
named.ConstructedFrom.BuildOrWriteId(cx, trapFile, symbolBeingDefined, addBaseClass);
named.ConstructedFrom.BuildOrWriteId(cx, trapFile, symbolBeingDefined, addBaseClass, constructUnderlyingTupleType);
trapFile.Write('<');
// Encode the nullability of the type arguments in the label.
// Type arguments with different nullability can result in
// Type arguments with different nullability can result in
// a constructed type with different nullability of its members and methods,
// so we need to create a distinct database entity for it.
trapFile.BuildList(",", named.GetAnnotatedTypeArguments(),
@@ -360,7 +363,7 @@ namespace Semmle.Extraction.CSharp
/// Constructs a display name string for this type symbol.
/// </summary>
/// <param name="trapFile">The trap builder used to store the result.</param>
public static void BuildDisplayName(this ITypeSymbol type, Context cx, TextWriter trapFile)
public static void BuildDisplayName(this ITypeSymbol type, Context cx, TextWriter trapFile, bool constructUnderlyingTupleType = false)
{
using (cx.StackGuard)
{
@@ -384,7 +387,7 @@ namespace Semmle.Extraction.CSharp
case TypeKind.Delegate:
case TypeKind.Error:
var named = (INamedTypeSymbol)type;
named.BuildNamedTypeDisplayName(cx, trapFile);
named.BuildNamedTypeDisplayName(cx, trapFile, constructUnderlyingTupleType);
return;
case TypeKind.Pointer:
var ptr = (IPointerTypeSymbol)type;
@@ -403,9 +406,9 @@ namespace Semmle.Extraction.CSharp
}
}
public static void BuildNamedTypeDisplayName(this INamedTypeSymbol namedType, Context cx, TextWriter trapFile)
public static void BuildNamedTypeDisplayName(this INamedTypeSymbol namedType, Context cx, TextWriter trapFile, bool constructUnderlyingTupleType)
{
if (namedType.IsTupleType)
if (!constructUnderlyingTupleType && namedType.IsTupleType)
{
trapFile.Write('(');
trapFile.BuildList(",", namedType.TupleElements.Select(f => f.Type),

View File

@@ -15,10 +15,11 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis" Version="3.4.0" />
<PackageReference Include="GitInfo" Version="2.0.20"><IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="Microsoft.CodeAnalysis" Version="3.7.0" />
<PackageReference Include="GitInfo" Version="2.0.20">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
</ItemGroup>
<ItemGroup>

View File

@@ -1962,6 +1962,13 @@ class ChiInstruction extends Instruction {
* Gets the operand that represents the new value written by the memory write.
*/
final Instruction getPartial() { result = getPartialOperand().getDef() }
/**
* Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
*/
final predicate getUpdatedInterval(int startBit, int endBit) {
Construction::getIntervalUpdatedByChi(this, startBit, endBit)
}
}
/**

View File

@@ -328,6 +328,14 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper
not Construction::isInCycle(useInstr) and
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
}
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
*/
predicate getUsedInterval(int startBitOffset, int endBitOffset) {
Construction::getUsedInterval(this, startBitOffset, endBitOffset)
}
}
/**

View File

@@ -214,6 +214,20 @@ private module Cached {
result = getMemoryOperandDefinition(instr, _, _)
}
/**
* Holds if the partial operand of this `ChiInstruction` updates the bit range
* `[startBitOffset, endBitOffset)` of the total operand.
*/
cached
predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBit, int endBit) { none() }
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)`.
*/
cached
predicate getUsedInterval(Operand operand, int startBit, int endBit) { none() }
/**
* Holds if `instr` is part of a cycle in the operand graph that doesn't go
* through a phi instruction and therefore should be impossible.

View File

@@ -1962,6 +1962,13 @@ class ChiInstruction extends Instruction {
* Gets the operand that represents the new value written by the memory write.
*/
final Instruction getPartial() { result = getPartialOperand().getDef() }
/**
* Gets the bit range `[startBit, endBit)` updated by the partial operand of this `ChiInstruction`, relative to the start address of the total operand.
*/
final predicate getUpdatedInterval(int startBit, int endBit) {
Construction::getIntervalUpdatedByChi(this, startBit, endBit)
}
}
/**

View File

@@ -328,6 +328,14 @@ class NonPhiMemoryOperand extends NonPhiOperand, MemoryOperand, NonPhiMemoryOper
not Construction::isInCycle(useInstr) and
strictcount(Construction::getMemoryOperandDefinition(useInstr, tag, _)) = 1
}
/**
* Holds if the operand totally overlaps with its definition and consumes the
* bit range `[startBitOffset, endBitOffset)` relative to the start address of the definition.
*/
predicate getUsedInterval(int startBitOffset, int endBitOffset) {
Construction::getUsedInterval(this, startBitOffset, endBitOffset)
}
}
/**

View File

@@ -149,6 +149,34 @@ private module Cached {
)
}
/**
* Holds if the partial operand of this `ChiInstruction` updates the bit range
* `[startBitOffset, endBitOffset)` of the total operand.
*/
cached
predicate getIntervalUpdatedByChi(ChiInstruction chi, int startBitOffset, int endBitOffset) {
exists(Alias::MemoryLocation location, OldInstruction oldInstruction |
oldInstruction = getOldInstruction(chi.getPartial()) and
location = Alias::getResultMemoryLocation(oldInstruction) and
startBitOffset = Alias::getStartBitOffset(location) and
endBitOffset = Alias::getEndBitOffset(location)
)
}
/**
* Holds if `operand` totally overlaps with its definition and consumes the bit range
* `[startBitOffset, endBitOffset)`.
*/
cached
predicate getUsedInterval(NonPhiMemoryOperand operand, int startBitOffset, int endBitOffset) {
exists(Alias::MemoryLocation location, OldIR::NonPhiMemoryOperand oldOperand |
oldOperand = operand.getUse().(OldInstruction).getAnOperand() and
location = Alias::getOperandMemoryLocation(oldOperand) and
startBitOffset = Alias::getStartBitOffset(location) and
endBitOffset = Alias::getEndBitOffset(location)
)
}
/**
* Holds if `instr` is part of a cycle in the operand graph that doesn't go
* through a phi instruction and therefore should be impossible.

View File

@@ -79,3 +79,9 @@ MemoryLocation getResultMemoryLocation(Instruction instr) {
MemoryLocation getOperandMemoryLocation(MemoryOperand operand) {
result = getMemoryLocation(getAddressOperandAllocation(operand.getAddressOperand()))
}
/** Gets the start bit offset of a `MemoryLocation`, if any. */
int getStartBitOffset(MemoryLocation location) { none() }
/** Gets the end bit offset of a `MemoryLocation`, if any. */
int getEndBitOffset(MemoryLocation location) { none() }

View File

@@ -46,6 +46,8 @@ private predicate isNotNeeded(Element e) {
or
e instanceof AnonymousClass
or
e instanceof TupleType
or
isNotNeeded(e.(Declaration).getDeclaringType())
or
isNotNeeded(e.(Parameter).getDeclaringElement())

View File

@@ -932,7 +932,7 @@ class UnknownType extends Type, @unknown_type { }
*/
class TupleType extends ValueType, @tuple_type {
/** Gets the underlying type of this tuple, which is of type `System.ValueTuple`. */
ConstructedStruct getUnderlyingType() { tuple_underlying_type(this, getTypeRef(result)) }
Struct getUnderlyingType() { tuple_underlying_type(this, getTypeRef(result)) }
/**
* Gets the `n`th element of this tuple, indexed from 0.

View File

@@ -143,8 +143,8 @@ private predicate readonlyAccess(Access a) {
// A read-only method call
exists(MethodCall mc | mc.getQualifier() = a | mc.getTarget().hasName(readonlyMethodName()))
or
// Any property access
a = any(PropertyAccess pa).getQualifier()
// Any property read
a = any(PropertyRead pr).getQualifier()
or
// An element read
a = any(ElementRead er).getQualifier()

View File

@@ -692,7 +692,7 @@ overrides(
int base_id: @callable ref);
explicitly_implements(
unique int id: @member ref,
int id: @member ref,
int interface_id: @interface_or_ref ref);
local_functions(

View File

@@ -17,10 +17,18 @@ query predicate tooManyHandles(string s) {
)
}
private class UniqueMetadataEntity extends MetadataEntity {
UniqueMetadataEntity() {
// Tuple types such as `(,)` and `ValueTuple`2` share the same handle
not this instanceof TupleType and
not this.getQualifiedName().matches("System.ValueTuple%")
}
}
query predicate tooManyMatchingHandles(string s) {
exists(MetadataEntity e, Assembly a, int handle |
exists(UniqueMetadataEntity e, Assembly a, int handle |
metadata_handle(e, a, handle) and
strictcount(MetadataEntity e2 | metadata_handle(e2, a, handle)) > 2 and
strictcount(UniqueMetadataEntity e2 | metadata_handle(e2, a, handle)) > 2 and
s = e.getQualifiedName()
)
}

View File

@@ -152,6 +152,21 @@ class Test
break;
}
}
void f9()
{
var l1 = new MyList(); // BAD
var x1 = l1[0];
var l2 = new MyList(); // GOOD
var x2 = l2[0];
l2.Prop = 42;
}
class MyList : List<int>
{
public int Prop { get { return 0; } set { Add(value); } }
}
}
// semmle-extractor-options: /r:System.Collections.dll

View File

@@ -8,4 +8,5 @@
| ReadOnlyContainer.cs:91:13:91:14 | v8 | The contents of this container are never initialized. |
| ReadOnlyContainer.cs:96:13:96:14 | v9 | The contents of this container are never initialized. |
| ReadOnlyContainer.cs:99:13:99:15 | v10 | The contents of this container are never initialized. |
| ReadOnlyContainer.cs:121:13:121:15 | v11 | The contents of this container are never initialized. |
| ReadOnlyContainer.cs:121:13:121:15 | v11 | The contents of this container are never initialized. |
| ReadOnlyContainer.cs:158:13:158:14 | l1 | The contents of this container are never initialized. |

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
description: Removed uniqueness constraint from 'explicitly_implements' relation
compatibility: full

View File

@@ -43,7 +43,7 @@ public class Main {
* A version identifier that should be updated every time the extractor changes in such a way that
* it may produce different tuples for the same file under the same {@link ExtractorConfig}.
*/
public static final String EXTRACTOR_VERSION = "2020-09-02";
public static final String EXTRACTOR_VERSION = "2020-09-12";
public static final Pattern NEWLINE = Pattern.compile("\n");

View File

@@ -5,6 +5,7 @@
import Customizations
import semmle.javascript.Aliases
import semmle.javascript.AMD
import semmle.javascript.ApiGraphs
import semmle.javascript.Arrays
import semmle.javascript.AST
import semmle.javascript.BasicBlocks

View File

@@ -0,0 +1,746 @@
/**
* Provides an implementation of _API graphs_, which are an abstract representation of the API
* surface used and/or defined by a code base.
*
* The nodes of the API graph represent definitions and uses of API components. The edges are
* directed and labeled; they specify how the components represented by nodes relate to each other.
* For example, if one of the nodes represents a definition of an API function, then there
* will be nodes corresponding to the function's parameters, which are connected to the function
* node by edges labeled `parameter <i>`.
*/
import javascript
/**
* Provides classes and predicates for working with APIs defined or used in a database.
*/
module API {
/**
* An abstract representation of a definition or use of an API component such as a function
* exported by an npm package, a parameter of such a function, or its result.
*/
class Node extends Impl::TApiNode {
/**
* Gets a data-flow node corresponding to a use of the API component represented by this node.
*
* For example, `require('fs').readFileSync` is a use of the function `readFileSync` from the
* `fs` module, and `require('fs').readFileSync(file)` is a use of the result of that function.
*
* As another example, in the assignment `exports.plusOne = (x) => x+1` the two references to
* `x` are uses of the first parameter of `plusOne`.
*/
DataFlow::Node getAUse() {
exists(DataFlow::SourceNode src | Impl::use(this, src) |
Impl::trackUseNode(src).flowsTo(result)
)
}
/**
* Gets a data-flow node corresponding to the right-hand side of a definition of the API
* component represented by this node.
*
* For example, in the assignment `exports.plusOne = (x) => x+1`, the function expression
* `(x) => x+1` is the right-hand side of the definition of the member `plusOne` of
* the enclosing module, and the expression `x+1` is the right-had side of the definition of
* its result.
*
* Note that for parameters, it is the arguments flowing into that parameter that count as
* right-hand sides of the definition, not the declaration of the parameter itself.
* Consequently, in `require('fs').readFileSync(file)`, `file` is the right-hand
* side of a definition of the first parameter of `readFileSync` from the `fs` module.
*/
DataFlow::Node getARhs() { Impl::rhs(this, result) }
/**
* Gets a node representing member `m` of this API component.
*
* For example, modules have an `exports` member representing their exports, and objects have
* their properties as members.
*/
bindingset[m]
bindingset[result]
Node getMember(string m) { result = getASuccessor(Label::member(m)) }
/**
* Gets a node representing a member of this API component where the name of the member is
* not known statically.
*/
Node getUnknownMember() { result = getASuccessor(Label::unknownMember()) }
/**
* Gets a node representing a member of this API component where the name of the member may
* or may not be known statically.
*/
Node getAMember() {
result = getASuccessor(Label::member(_)) or
result = getUnknownMember()
}
/**
* Gets a node representing an instance of this API component, that is, an object whose
* constructor is the function represented by this node.
*
* For example, if this node represents a use of some class `A`, then there might be a node
* representing instances of `A`, typically corresponding to expressions `new A()` at the
* source level.
*/
Node getInstance() { result = getASuccessor(Label::instance()) }
/**
* Gets a node representing the `i`th parameter of the function represented by this node.
*/
bindingset[i]
Node getParameter(int i) { result = getASuccessor(Label::parameter(i)) }
/**
* Gets the number of parameters of the function represented by this node.
*/
int getNumParameter() {
result =
max(string s | exists(getASuccessor(Label::parameterByStringIndex(s))) | s.toInt()) + 1
}
/**
* Gets a node representing the last parameter of the function represented by this node.
*/
Node getLastParameter() { result = getParameter(getNumParameter() - 1) }
/**
* Gets a node representing the receiver of the function represented by this node.
*/
Node getReceiver() { result = getASuccessor(Label::receiver()) }
/**
* Gets a node representing a parameter or the receiver of the function represented by this
* node.
*/
Node getAParameter() {
result = getASuccessor(Label::parameterByStringIndex(_)) or
result = getReceiver()
}
/**
* Gets a node representing the result of the function represented by this node.
*/
Node getReturn() { result = getASuccessor(Label::return()) }
/**
* Gets a node representing the promised value wrapped in the `Promise` object represented by
* this node.
*/
Node getPromised() { result = getASuccessor(Label::promised()) }
/**
* Gets a string representation of the lexicographically least among all shortest access paths
* from the root to this node.
*/
string getPath() { result = min(string p | p = getAPath(Impl::distanceFromRoot(this)) | p) }
/**
* Gets a node such that there is an edge in the API graph between this node and the other
* one, and that edge is labeled with `lbl`.
*/
Node getASuccessor(string lbl) { Impl::edge(this, lbl, result) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one, and that edge is labeled with `lbl`
*/
Node getAPredecessor(string lbl) { this = result.getASuccessor(lbl) }
/**
* Gets a node such that there is an edge in the API graph between this node and the other
* one.
*/
Node getAPredecessor() { result = getAPredecessor(_) }
/**
* Gets a node such that there is an edge in the API graph between that other node and
* this one.
*/
Node getASuccessor() { result = getASuccessor(_) }
/**
* Holds if this node may take its value from `that` node.
*
* In other words, the value of a use of `that` may flow into the right-hand side of a
* definition of this node.
*/
predicate refersTo(Node that) { this.getARhs() = that.getAUse() }
/**
* Gets the data-flow node that gives rise to this node, if any.
*/
DataFlow::Node getInducingNode() {
this = Impl::MkClassInstance(result) or
this = Impl::MkUse(result) or
this = Impl::MkDef(result) or
this = Impl::MkAsyncFuncResult(result)
}
/**
* Holds if this node is located in file `path` between line `startline`, column `startcol`,
* and line `endline`, column `endcol`.
*
* For nodes that do not have a meaningful location, `path` is the empty string and all other
* parameters are zero.
*/
predicate hasLocationInfo(string path, int startline, int startcol, int endline, int endcol) {
getInducingNode().hasLocationInfo(path, startline, startcol, endline, endcol)
or
not exists(getInducingNode()) and
path = "" and
startline = 0 and
startcol = 0 and
endline = 0 and
endcol = 0
}
/**
* Gets a textual representation of this node.
*/
string toString() {
none() // defined in subclasses
}
/**
* Gets a path of the given `length` from the root to this node.
*/
private string getAPath(int length) {
this instanceof Impl::MkRoot and
length = 0 and
result = ""
or
exists(Node pred, string lbl, string predpath |
Impl::edge(pred, lbl, this) and
lbl != "" and
predpath = pred.getAPath(length - 1) and
exists(string space | if length = 1 then space = "" else space = " " |
result = "(" + lbl + space + predpath + ")" and
// avoid producing strings longer than 1MB
result.length() < 1000 * 1000
)
) and
length in [1 .. Impl::distanceFromRoot(this)]
}
}
/** The root node of an API graph. */
class Root extends Node, Impl::MkRoot {
override string toString() { result = "root" }
}
/** A node corresponding to a definition of an API component. */
class Definition extends Node, Impl::TDef {
override string toString() { result = "def " + getPath() }
}
/** A node corresponding to the use of an API component. */
class Use extends Node, Impl::TUse {
override string toString() { result = "use " + getPath() }
}
/** Gets the root node. */
Root root() { any() }
/** Gets a node corresponding to an import of module `m`. */
Node moduleImport(string m) {
result = Impl::MkModuleImport(m) or
result = Impl::MkModuleImport(m).(Node).getMember("default")
}
/** Gets a node corresponding to an export of module `m`. */
Node moduleExport(string m) { result = Impl::MkModuleDef(m).(Node).getMember("exports") }
/**
* An API entry point.
*
* Extend this class to define additional API entry points other than modules.
* Typical examples include global variables.
*/
abstract class EntryPoint extends string {
bindingset[this]
EntryPoint() { any() }
/** Gets a data-flow node that uses this entry point. */
abstract DataFlow::SourceNode getAUse();
/** Gets a data-flow node that defines this entry point. */
abstract DataFlow::Node getARhs();
}
/**
* Provides the actual implementation of API graphs, cached for performance.
*
* Ideally, we'd like nodes to correspond to (global) access paths, with edge labels
* corresponding to extending the access path by one element. We also want to be able to map
* nodes to their definitions and uses in the data-flow graph, and this should happen modulo
* (inter-procedural) data flow.
*
* This, however, is not easy to implement, since access paths can have unbounded length
* and we need some way of recognizing cycles to avoid non-termination. Unfortunately, expressing
* a condition like "this node hasn't been involved in constructing any predecessor of
* this node in the API graph" without negative recursion is tricky.
*
* So instead most nodes are directly associated with a data-flow node, representing
* either a use or a definition of an API component. This ensures that we only have a finite
* number of nodes. However, we can now have multiple nodes with the same access
* path, which are essentially indistinguishable for a client of the API.
*
* On the other hand, a single node can have multiple access paths (which is, of
* course, unavoidable). We pick as canonical the alphabetically least access path with
* shortest length.
*/
cached
private module Impl {
cached
newtype TApiNode =
MkRoot() or
MkModuleDef(string m) { exists(MkModuleExport(m)) } or
MkModuleUse(string m) { exists(MkModuleImport(m)) } or
MkModuleExport(string m) {
exists(Module mod | mod = importableModule(m) |
// exclude modules that don't actually export anything
exports(m, _)
or
exports(m, _, _)
or
exists(NodeModule nm | nm = mod |
exists(SSA::implicitInit([nm.getModuleVariable(), nm.getExportsVariable()]))
)
)
} or
MkModuleImport(string m) { imports(_, m) } or
MkClassInstance(DataFlow::ClassNode cls) { cls = trackDefNode(_) and hasSemantics(cls) } or
MkAsyncFuncResult(DataFlow::FunctionNode f) {
f = trackDefNode(_) and f.getFunction().isAsync() and hasSemantics(f)
} or
MkDef(DataFlow::Node nd) { rhs(_, _, nd) } or
MkUse(DataFlow::Node nd) { use(_, _, nd) } or
MkCanonicalNameDef(CanonicalName n) { isDefined(n) } or
MkCanonicalNameUse(CanonicalName n) { isUsed(n) }
class TDef = MkModuleDef or TNonModuleDef;
class TNonModuleDef =
MkModuleExport or MkClassInstance or MkAsyncFuncResult or MkDef or MkCanonicalNameDef;
class TUse = MkModuleUse or MkModuleImport or MkUse or MkCanonicalNameUse;
private predicate hasSemantics(DataFlow::Node nd) { not nd.getTopLevel().isExterns() }
/** Holds if `imp` is an import of module `m`. */
private predicate imports(DataFlow::Node imp, string m) {
imp = DataFlow::moduleImport(m) and
// path must not start with a dot or a slash
m.regexpMatch("[^./].*") and
hasSemantics(imp)
}
/** Gets the definition of module `m`. */
private Module importableModule(string m) {
exists(NPMPackage pkg, PackageJSON json |
json = pkg.getPackageJSON() and not json.isPrivate()
|
result = pkg.getMainModule() and
not result.isExterns() and
m = pkg.getPackageName()
)
}
private predicate isUsed(CanonicalName n) {
exists(n.(TypeName).getAnAccess()) or
exists(n.(Namespace).getAnAccess())
}
private predicate isDefined(CanonicalName n) {
exists(ASTNode def |
def = n.(TypeName).getADefinition() or
def = n.(Namespace).getADefinition()
|
not def.isAmbient()
)
}
/**
* Holds if `rhs` is the right-hand side of a definition of a node that should have an
* incoming edge from `base` labeled `lbl` in the API graph.
*/
cached
predicate rhs(TApiNode base, string lbl, DataFlow::Node rhs) {
hasSemantics(rhs) and
(
base = MkRoot() and
rhs = lbl.(EntryPoint).getARhs()
or
exists(string m, string prop |
base = MkModuleExport(m) and
lbl = Label::member(prop) and
exports(m, prop, rhs)
)
or
exists(DataFlow::Node def, DataFlow::SourceNode pred |
rhs(base, def) and pred = trackDefNode(def)
|
exists(DataFlow::PropWrite pw | pw = pred.getAPropertyWrite() |
lbl = Label::memberFromRef(pw) and
rhs = pw.getRhs()
)
or
exists(DataFlow::FunctionNode fn | fn = pred |
not fn.getFunction().isAsync() and
lbl = Label::return() and
rhs = fn.getAReturn()
)
or
lbl = Label::promised() and
PromiseFlow::storeStep(rhs, pred, Promises::valueProp())
)
or
exists(DataFlow::ClassNode cls, string name |
base = MkClassInstance(cls) and
lbl = Label::member(name) and
rhs = cls.getInstanceMethod(name)
)
or
exists(DataFlow::FunctionNode f |
base = MkAsyncFuncResult(f) and
lbl = Label::promised() and
rhs = f.getAReturn()
)
or
exists(DataFlow::SourceNode src, DataFlow::InvokeNode invk |
use(base, src) and invk = trackUseNode(src).getAnInvocation()
|
exists(int i |
lbl = Label::parameter(i) and
rhs = invk.getArgument(i)
)
or
lbl = Label::receiver() and
rhs = invk.(DataFlow::CallNode).getReceiver()
)
or
exists(DataFlow::SourceNode src, DataFlow::PropWrite pw |
use(base, src) and pw = trackUseNode(src).getAPropertyWrite() and rhs = pw.getRhs()
|
lbl = Label::memberFromRef(pw)
)
)
}
/**
* Holds if `rhs` is the right-hand side of a definition of node `nd`.
*/
cached
predicate rhs(TApiNode nd, DataFlow::Node rhs) {
exists(string m | nd = MkModuleExport(m) | exports(m, rhs))
or
nd = MkDef(rhs)
or
exists(CanonicalName n | nd = MkCanonicalNameDef(n) |
rhs = n.(Namespace).getADefinition().flow() or
rhs = n.(CanonicalFunctionName).getADefinition().flow()
)
}
/**
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
* `lbl` in the API graph.
*/
cached
predicate use(TApiNode base, string lbl, DataFlow::Node ref) {
hasSemantics(ref) and
(
base = MkRoot() and
ref = lbl.(EntryPoint).getAUse()
or
exists(DataFlow::SourceNode src, DataFlow::SourceNode pred |
use(base, src) and pred = trackUseNode(src)
|
// `module.exports` is special: it is a use of a def-node, not a use-node,
// so we want to exclude it here
(base instanceof TNonModuleDef or base instanceof TUse) and
lbl = Label::memberFromRef(ref) and
ref = pred.getAPropertyRead()
or
lbl = Label::instance() and
ref = pred.getAnInstantiation()
or
lbl = Label::return() and
ref = pred.getAnInvocation()
or
lbl = Label::promised() and
PromiseFlow::loadStep(pred, ref, Promises::valueProp())
)
or
exists(DataFlow::Node def, DataFlow::FunctionNode fn |
rhs(base, def) and fn = trackDefNode(def)
|
exists(int i |
lbl = Label::parameter(i) and
ref = fn.getParameter(i)
)
or
lbl = Label::receiver() and
ref = fn.getReceiver()
)
or
exists(DataFlow::Node def, DataFlow::ClassNode cls, int i |
rhs(base, def) and cls = trackDefNode(def)
|
lbl = Label::parameter(i) and
ref = cls.getConstructor().getParameter(i)
)
or
exists(TypeName tn |
base = MkCanonicalNameUse(tn) and
lbl = Label::instance() and
ref = getANodeWithType(tn)
)
)
}
/**
* Holds if `ref` is a use of node `nd`.
*/
cached
predicate use(TApiNode nd, DataFlow::Node ref) {
exists(string m, Module mod | nd = MkModuleDef(m) and mod = importableModule(m) |
ref = DataFlow::ssaDefinitionNode(SSA::implicitInit(mod.(NodeModule).getModuleVariable()))
or
ref = DataFlow::parameterNode(mod.(AmdModule).getDefine().getModuleParameter())
)
or
exists(string m, Module mod | nd = MkModuleExport(m) and mod = importableModule(m) |
ref = DataFlow::ssaDefinitionNode(SSA::implicitInit(mod.(NodeModule).getExportsVariable()))
or
ref = DataFlow::parameterNode(mod.(AmdModule).getDefine().getExportsParameter())
or
exists(DataFlow::Node base | use(MkModuleDef(m), base) |
ref = trackUseNode(base).getAPropertyRead("exports")
)
)
or
exists(string m |
nd = MkModuleImport(m) and
ref = DataFlow::moduleImport(m)
)
or
exists(DataFlow::ClassNode cls | nd = MkClassInstance(cls) | ref = cls.getAReceiverNode())
or
nd = MkUse(ref)
or
exists(CanonicalName n | nd = MkCanonicalNameUse(n) | ref.asExpr() = n.getAnAccess())
}
/** Holds if module `m` exports `rhs`. */
private predicate exports(string m, DataFlow::Node rhs) {
exists(Module mod | mod = importableModule(m) |
rhs = mod.(AmdModule).getDefine().getModuleExpr().flow()
or
exports(m, "default", rhs)
or
exists(ExportAssignDeclaration assgn | assgn.getTopLevel() = mod |
rhs = assgn.getExpression().flow()
)
or
rhs = mod.(Closure::ClosureModule).getExportsVariable().getAnAssignedExpr().flow()
)
}
/** Holds if module `m` exports `rhs` under the name `prop`. */
private predicate exports(string m, string prop, DataFlow::Node rhs) {
exists(ExportDeclaration exp | exp.getEnclosingModule() = importableModule(m) |
rhs = exp.getSourceNode(prop)
or
exists(Variable v |
exp.exportsAs(v, prop) and
rhs = v.getAnAssignedExpr().flow()
)
)
}
private DataFlow::SourceNode trackUseNode(DataFlow::SourceNode nd, DataFlow::TypeTracker t) {
t.start() and
use(_, nd) and
result = nd
or
exists(DataFlow::TypeTracker t2 | result = trackUseNode(nd, t2).track(t2, t))
}
/**
* Gets a node that is inter-procedurally reachable from `nd`, which is a use of some node.
*/
cached
DataFlow::SourceNode trackUseNode(DataFlow::SourceNode nd) {
result = trackUseNode(nd, DataFlow::TypeTracker::end())
}
private DataFlow::SourceNode trackDefNode(DataFlow::Node nd, DataFlow::TypeBackTracker t) {
t.start() and
rhs(_, nd) and
result = nd.getALocalSource()
or
exists(DataFlow::TypeBackTracker t2 | result = trackDefNode(nd, t2).backtrack(t2, t))
}
/**
* Gets a node that inter-procedurally flows into `nd`, which is a definition of some node.
*/
cached
DataFlow::SourceNode trackDefNode(DataFlow::Node nd) {
result = trackDefNode(nd, DataFlow::TypeBackTracker::end())
}
private DataFlow::SourceNode getANodeWithType(TypeName tn) {
exists(string moduleName, string typeName |
tn.hasQualifiedName(moduleName, typeName) and
result.hasUnderlyingType(moduleName, typeName)
)
}
/**
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
*/
cached
predicate edge(TApiNode pred, string lbl, TApiNode succ) {
exists(string m |
pred = MkRoot() and
lbl = Label::mod(m)
|
succ = MkModuleDef(m)
or
succ = MkModuleUse(m)
)
or
exists(string m |
pred = MkModuleDef(m) and
lbl = Label::member("exports") and
succ = MkModuleExport(m)
or
pred = MkModuleUse(m) and
lbl = Label::member("exports") and
succ = MkModuleImport(m)
)
or
exists(DataFlow::SourceNode ref |
use(pred, lbl, ref) and
succ = MkUse(ref)
)
or
exists(DataFlow::Node rhs |
rhs(pred, lbl, rhs) and
succ = MkDef(rhs)
)
or
exists(DataFlow::Node def |
rhs(pred, def) and
lbl = Label::instance() and
succ = MkClassInstance(trackDefNode(def))
)
or
exists(CanonicalName cn |
pred = MkRoot() and
lbl = Label::mod(cn.getExternalModuleName())
|
succ = MkCanonicalNameUse(cn) or
succ = MkCanonicalNameDef(cn)
)
or
exists(CanonicalName cn1, CanonicalName cn2 |
cn2 = cn1.getAChild() and
lbl = Label::member(cn2.getName())
|
(pred = MkCanonicalNameDef(cn1) or pred = MkCanonicalNameUse(cn1)) and
(succ = MkCanonicalNameDef(cn2) or succ = MkCanonicalNameUse(cn2))
)
or
exists(DataFlow::Node nd, DataFlow::FunctionNode f |
pred = MkDef(nd) and
f = trackDefNode(nd) and
lbl = Label::return() and
succ = MkAsyncFuncResult(f)
)
}
/**
* Holds if there is an edge from `pred` to `succ` in the API graph.
*/
private predicate edge(TApiNode pred, TApiNode succ) { edge(pred, _, succ) }
/** Gets the shortest distance from the root to `nd` in the API graph. */
cached
int distanceFromRoot(TApiNode nd) = shortestDistances(MkRoot/0, edge/2)(_, nd, result)
}
import Label as EdgeLabel
}
private module Label {
/** Gets the edge label for the module `m`. */
bindingset[m]
bindingset[result]
string mod(string m) { result = "module " + m }
/** Gets the `member` edge label for member `m`. */
bindingset[m]
bindingset[result]
string member(string m) { result = "member " + m }
/** Gets the `member` edge label for the unknown member. */
string unknownMember() { result = "member *" }
/** Gets the `member` edge label for the given property reference. */
string memberFromRef(DataFlow::PropRef pr) {
exists(string pn | pn = pr.getPropertyName() |
result = member(pn) and
// only consider properties with alphanumeric(-ish) names, excluding special properties
// and properties whose names look like they are meant to be internal
pn.regexpMatch("(?!prototype$|__)[a-zA-Z_$][\\w\\-.$]*")
)
or
not exists(pr.getPropertyName()) and
result = unknownMember()
}
/** Gets the `instance` edge label. */
string instance() { result = "instance" }
/**
* Gets the `parameter` edge label for the parameter `s`.
*
* This is an internal helper predicate; use `parameter` instead.
*/
bindingset[result]
bindingset[s]
string parameterByStringIndex(string s) {
result = "parameter " + s and
s.toInt() >= 0
}
/** Gets the `parameter` edge label for the `i`th parameter. */
bindingset[i]
string parameter(int i) { result = parameterByStringIndex(i.toString()) }
/** Gets the `parameter` edge label for the receiver. */
string receiver() { result = "parameter -1" }
/** Gets the `return` edge label. */
string return() { result = "return" }
/** Gets the `promised` edge label connecting a promise to its contained value. */
string promised() { result = "promised" }
}
/**
* A CommonJS `module` or `exports` variable, considered as a source node.
*/
private class AdditionalSourceNode extends DataFlow::SourceNode::Range {
AdditionalSourceNode() {
exists(NodeModule m, Variable v |
v in [m.getModuleVariable(), m.getExportsVariable()] and
this = DataFlow::ssaDefinitionNode(SSA::implicitInit(v))
)
}
}

View File

@@ -28,42 +28,29 @@ module SQL {
* Provides classes modelling the (API compatible) `mysql` and `mysql2` packages.
*/
private module MySql {
private DataFlow::SourceNode mysql() { result = DataFlow::moduleImport(["mysql", "mysql2"]) }
private DataFlow::CallNode createPool() { result = mysql().getAMemberCall("createPool") }
/** Gets a reference to a MySQL pool. */
private DataFlow::SourceNode pool(DataFlow::TypeTracker t) {
t.start() and
result = createPool()
or
exists(DataFlow::TypeTracker t2 | result = pool(t2).track(t2, t))
}
/** Gets a reference to a MySQL pool. */
private DataFlow::SourceNode pool() { result = pool(DataFlow::TypeTracker::end()) }
/** Gets the package name `mysql` or `mysql2`. */
API::Node mysql() { result = API::moduleImport(["mysql", "mysql2"]) }
/** Gets a call to `mysql.createConnection`. */
DataFlow::CallNode createConnection() { result = mysql().getAMemberCall("createConnection") }
API::Node createConnection() { result = mysql().getMember("createConnection").getReturn() }
/** Gets a reference to a MySQL connection instance. */
private DataFlow::SourceNode connection(DataFlow::TypeTracker t) {
t.start() and
(
result = createConnection()
or
result = pool().getAMethodCall("getConnection").getABoundCallbackParameter(0, 1)
)
/** Gets a call to `mysql.createPool`. */
API::Node createPool() { result = mysql().getMember("createPool").getReturn() }
/** Gets a data flow node that contains a freshly created MySQL connection instance. */
API::Node connection() {
result = createConnection()
or
exists(DataFlow::TypeTracker t2 | result = connection(t2).track(t2, t))
result = createPool().getMember("getConnection").getParameter(0).getParameter(1)
}
/** Gets a reference to a MySQL connection instance. */
DataFlow::SourceNode connection() { result = connection(DataFlow::TypeTracker::end()) }
/** A call to the MySql `query` method. */
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() { this = [pool(), connection()].getAMethodCall("query") }
QueryCall() {
exists(API::Node recv | recv = createPool() or recv = connection() |
this = recv.getMember("query").getReturn().getAUse()
)
}
override DataFlow::Node getAQueryArgument() { result = getArgument(0) }
}
@@ -76,7 +63,12 @@ private module MySql {
/** A call to the `escape` or `escapeId` method that performs SQL sanitization. */
class EscapingSanitizer extends SQL::SqlSanitizer, MethodCallExpr {
EscapingSanitizer() {
this = [mysql(), pool(), connection()].getAMethodCall(["escape", "escapeId"]).asExpr() and
this =
[mysql(), createPool(), connection()]
.getMember(["escape", "escapeId"])
.getReturn()
.getAUse()
.asExpr() and
input = this.getArgument(0) and
output = this
}
@@ -87,8 +79,9 @@ private module MySql {
string kind;
Credentials() {
exists(string prop |
this = [createConnection(), createPool()].getOptionArgument(0, prop).asExpr() and
exists(API::Node call, string prop |
call in [createConnection(), createPool()] and
call.getAUse().asExpr().(CallExpr).hasOptionArgument(0, prop, this) and
(
prop = "user" and kind = "user name"
or
@@ -105,49 +98,29 @@ private module MySql {
* Provides classes modelling the `pg` package.
*/
private module Postgres {
/** Gets an expression of the form `new require('pg').Client()`. */
API::Node newClient() { result = API::moduleImport("pg").getMember("Client").getInstance() }
/** Gets a data flow node that holds a freshly created Postgres client instance. */
API::Node client() {
result = newClient()
or
// pool.connect(function(err, client) { ... })
result = newPool().getMember("connect").getParameter(0).getParameter(1)
}
/** Gets an expression that constructs a new connection pool. */
DataFlow::InvokeNode newPool() {
API::Node newPool() {
// new require('pg').Pool()
result = DataFlow::moduleImport("pg").getAConstructorInvocation("Pool")
result = API::moduleImport("pg").getMember("Pool").getInstance()
or
// new require('pg-pool')
result = DataFlow::moduleImport("pg-pool").getAnInstantiation()
result = API::moduleImport("pg-pool").getInstance()
}
/** Gets a data flow node referring to a connection pool. */
private DataFlow::SourceNode pool(DataFlow::TypeTracker t) {
t.start() and
result = newPool()
or
exists(DataFlow::TypeTracker t2 | result = pool(t2).track(t2, t))
}
/** Gets a data flow node referring to a connection pool. */
DataFlow::SourceNode pool() { result = pool(DataFlow::TypeTracker::end()) }
/** Gets a creation of a Postgres client. */
DataFlow::InvokeNode newClient() {
result = DataFlow::moduleImport("pg").getAConstructorInvocation("Client")
}
/** Gets a data flow node referring to a Postgres client. */
private DataFlow::SourceNode client(DataFlow::TypeTracker t) {
t.start() and
(
result = newClient()
or
result = pool().getAMethodCall("connect").getABoundCallbackParameter(0, 1)
)
or
exists(DataFlow::TypeTracker t2 | result = client(t2).track(t2, t))
}
/** Gets a data flow node referring to a Postgres client. */
DataFlow::SourceNode client() { result = client(DataFlow::TypeTracker::end()) }
/** A call to the Postgres `query` method. */
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() { this = [client(), pool()].getAMethodCall("query") }
QueryCall() { this = [client(), newPool()].getMember("query").getReturn().getAUse() }
override DataFlow::Node getAQueryArgument() { result = getArgument(0) }
}
@@ -162,10 +135,14 @@ private module Postgres {
string kind;
Credentials() {
exists(string prop | this = [newClient(), newPool()].getOptionArgument(0, prop).asExpr() |
prop = "user" and kind = "user name"
or
prop = "password" and kind = prop
exists(DataFlow::InvokeNode call, string prop |
call = [client(), newPool()].getAUse() and
this = call.getOptionArgument(0, prop).asExpr() and
(
prop = "user" and kind = "user name"
or
prop = "password" and kind = prop
)
)
}
@@ -178,29 +155,18 @@ private module Postgres {
*/
private module Sqlite {
/** Gets a reference to the `sqlite3` module. */
DataFlow::SourceNode sqlite() {
result = DataFlow::moduleImport("sqlite3")
API::Node sqlite() {
result = API::moduleImport("sqlite3")
or
result = sqlite().getAMemberCall("verbose")
result = sqlite().getMember("verbose").getReturn()
}
/** Gets an expression that constructs a Sqlite database instance. */
DataFlow::SourceNode newDb() {
API::Node newDb() {
// new require('sqlite3').Database()
result = sqlite().getAConstructorInvocation("Database")
result = sqlite().getMember("Database").getInstance()
}
/** Gets a data flow node referring to a Sqlite database instance. */
private DataFlow::SourceNode db(DataFlow::TypeTracker t) {
t.start() and
result = newDb()
or
exists(DataFlow::TypeTracker t2 | result = db(t2).track(t2, t))
}
/** Gets a data flow node referring to a Sqlite database instance. */
DataFlow::SourceNode db() { result = db(DataFlow::TypeTracker::end()) }
/** A call to a Sqlite query method. */
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() {
@@ -212,7 +178,7 @@ private module Sqlite {
meth = "prepare" or
meth = "run"
|
this = db().getAMethodCall(meth)
this = newDb().getMember(meth).getReturn().getAUse()
)
}
@@ -230,30 +196,24 @@ private module Sqlite {
*/
private module MsSql {
/** Gets a reference to the `mssql` module. */
DataFlow::SourceNode mssql() { result = DataFlow::moduleImport("mssql") }
API::Node mssql() { result = API::moduleImport("mssql") }
/** Gets a data flow node referring to a request object. */
private DataFlow::SourceNode request(DataFlow::TypeTracker t) {
t.start() and
(
// new require('mssql').Request()
result = mssql().getAConstructorInvocation("Request")
or
// request.input(...)
result = request().getAMethodCall("input")
)
/** Gets an expression that creates a request object. */
API::Node request() {
// new require('mssql').Request()
result = mssql().getMember("Request").getInstance()
or
exists(DataFlow::TypeTracker t2 | result = request(t2).track(t2, t))
// request.input(...)
result = request().getMember("input").getReturn()
}
/** Gets a data flow node referring to a request object. */
DataFlow::SourceNode request() { result = request(DataFlow::TypeTracker::end()) }
/** A tagged template evaluated as a query. */
private class QueryTemplateExpr extends DatabaseAccess, DataFlow::ValueNode {
override TaggedTemplateExpr astNode;
QueryTemplateExpr() { mssql().getAPropertyRead("query").flowsToExpr(astNode.getTag()) }
QueryTemplateExpr() {
mssql().getMember("query").getAUse() = DataFlow::valueNode(astNode.getTag())
}
override DataFlow::Node getAQueryArgument() {
result = DataFlow::valueNode(astNode.getTemplate().getAnElement())
@@ -262,7 +222,7 @@ private module MsSql {
/** A call to a MsSql query method. */
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() { this = request().getAMethodCall(["query", "batch"]) }
QueryCall() { this = request().getMember(["query", "batch"]).getReturn().getAUse() }
override DataFlow::Node getAQueryArgument() { result = getArgument(0) }
}
@@ -292,9 +252,9 @@ private module MsSql {
Credentials() {
exists(DataFlow::InvokeNode call, string prop |
(
call = mssql().getAMemberCall("connect")
call = mssql().getMember("connect").getReturn().getAUse()
or
call = mssql().getAConstructorInvocation("ConnectionPool")
call = mssql().getMember("ConnectionPool").getInstance().getAUse()
) and
this = call.getOptionArgument(0, prop).asExpr() and
(
@@ -313,26 +273,17 @@ private module MsSql {
* Provides classes modelling the `sequelize` package.
*/
private module Sequelize {
/** Gets a node referring to an instance of the `Sequelize` class. */
private DataFlow::SourceNode sequelize(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::moduleImport("sequelize").getAnInstantiation()
or
exists(DataFlow::TypeTracker t2 | result = sequelize(t2).track(t2, t))
}
/** Gets an import of the `sequelize` module. */
API::Node sequelize() { result = API::moduleImport("sequelize") }
/** Gets a node referring to an instance of the `Sequelize` class. */
DataFlow::SourceNode sequelize() { result = sequelize(DataFlow::TypeTracker::end()) }
/** Gets an expression that creates an instance of the `Sequelize` class. */
API::Node newSequelize() { result = sequelize().getInstance() }
/** A call to `Sequelize.query`. */
private class QueryCall extends DatabaseAccess, DataFlow::ValueNode {
override MethodCallExpr astNode;
private class QueryCall extends DatabaseAccess, DataFlow::MethodCallNode {
QueryCall() { this = newSequelize().getMember("query").getReturn().getAUse() }
QueryCall() { this = sequelize().getAMethodCall("query") }
override DataFlow::Node getAQueryArgument() {
result = DataFlow::valueNode(astNode.getArgument(0))
}
override DataFlow::Node getAQueryArgument() { result = getArgument(0) }
}
/** An expression that is passed to `Sequelize.query` method and hence interpreted as SQL. */
@@ -349,7 +300,7 @@ private module Sequelize {
Credentials() {
exists(NewExpr ne, string prop |
ne = sequelize().asExpr() and
ne = newSequelize().getAUse().asExpr() and
(
this = ne.getArgument(1) and prop = "username"
or
@@ -376,69 +327,36 @@ private module Spanner {
/**
* Gets a node that refers to the `Spanner` class
*/
DataFlow::SourceNode spanner() {
API::Node spanner() {
// older versions
result = DataFlow::moduleImport("@google-cloud/spanner")
result = API::moduleImport("@google-cloud/spanner")
or
// newer versions
result = DataFlow::moduleMember("@google-cloud/spanner", "Spanner")
result = API::moduleImport("@google-cloud/spanner").getMember("Spanner")
}
/** Gets a data flow node referring to the result of `Spanner()` or `new Spanner()`. */
private DataFlow::SourceNode spannerNew(DataFlow::TypeTracker t) {
t.start() and
result = spanner().getAnInvocation()
or
exists(DataFlow::TypeTracker t2 | result = spannerNew(t2).track(t2, t))
/**
* Gets a node that refers to an instance of the `Database` class.
*/
API::Node database() {
result =
spanner().getReturn().getMember("instance").getReturn().getMember("database").getReturn()
}
/** Gets a data flow node referring to the result of `Spanner()` or `new Spanner()`. */
DataFlow::SourceNode spannerNew() { result = spannerNew(DataFlow::TypeTracker::end()) }
/** Gets a data flow node referring to the result of `.instance()`. */
private DataFlow::SourceNode instance(DataFlow::TypeTracker t) {
t.start() and
result = spannerNew().getAMethodCall("instance")
or
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
/**
* Gets a node that refers to an instance of the `v1.SpannerClient` class.
*/
API::Node v1SpannerClient() {
result = spanner().getMember("v1").getMember("SpannerClient").getInstance()
}
/** Gets a data flow node referring to the result of `.instance()`. */
DataFlow::SourceNode instance() { result = instance(DataFlow::TypeTracker::end()) }
/** Gets a node that refers to an instance of the `Database` class. */
private DataFlow::SourceNode database(DataFlow::TypeTracker t) {
t.start() and
result = instance().getAMethodCall("database")
or
exists(DataFlow::TypeTracker t2 | result = database(t2).track(t2, t))
/**
* Gets a node that refers to a transaction object.
*/
API::Node transaction() {
result = database().getMember("runTransaction").getParameter(0).getParameter(1)
}
/** Gets a node that refers to an instance of the `Database` class. */
DataFlow::SourceNode database() { result = database(DataFlow::TypeTracker::end()) }
/** Gets a node that refers to an instance of the `v1.SpannerClient` class. */
private DataFlow::SourceNode v1SpannerClient(DataFlow::TypeTracker t) {
t.start() and
result = spanner().getAPropertyRead("v1").getAPropertyRead("SpannerClient").getAnInstantiation()
or
exists(DataFlow::TypeTracker t2 | result = v1SpannerClient(t2).track(t2, t))
}
/** Gets a node that refers to an instance of the `v1.SpannerClient` class. */
DataFlow::SourceNode v1SpannerClient() { result = v1SpannerClient(DataFlow::TypeTracker::end()) }
/** Gets a node that refers to a transaction object. */
private DataFlow::SourceNode transaction(DataFlow::TypeTracker t) {
t.start() and
result = database().getAMethodCall("runTransaction").getABoundCallbackParameter(0, 1)
or
exists(DataFlow::TypeTracker t2 | result = transaction(t2).track(t2, t))
}
/** Gets a node that refers to a transaction object. */
DataFlow::SourceNode transaction() { result = transaction(DataFlow::TypeTracker::end()) }
/**
* A call to a Spanner method that executes a SQL query.
*/
@@ -460,7 +378,8 @@ private module Spanner {
*/
class DatabaseRunCall extends SqlExecution {
DatabaseRunCall() {
this = database().getAMethodCall(["run", "runPartitionedUpdate", "runStream"])
this =
database().getMember(["run", "runPartitionedUpdate", "runStream"]).getReturn().getAUse()
}
}
@@ -468,7 +387,9 @@ private module Spanner {
* A call to `Transaction.run`, `Transaction.runStream` or `Transaction.runUpdate`.
*/
class TransactionRunCall extends SqlExecution {
TransactionRunCall() { this = transaction().getAMethodCall(["run", "runStream", "runUpdate"]) }
TransactionRunCall() {
this = transaction().getMember(["run", "runStream", "runUpdate"]).getReturn().getAUse()
}
}
/**
@@ -476,7 +397,8 @@ private module Spanner {
*/
class ExecuteSqlCall extends SqlExecution {
ExecuteSqlCall() {
this = v1SpannerClient().getAMethodCall(["executeSql", "executeStreamingSql"])
this =
v1SpannerClient().getMember(["executeSql", "executeStreamingSql"]).getReturn().getAUse()
}
override DataFlow::Node getAQueryArgument() {

View File

@@ -5,6 +5,50 @@
import javascript
private predicate execApi(string mod, string fn, int cmdArg, int optionsArg, boolean shell) {
mod = "cross-spawn" and
fn = "sync" and
cmdArg = 0 and
shell = false and
optionsArg = -1
or
mod = "execa" and
optionsArg = -1 and
(
shell = false and
(
fn = "node" or
fn = "shell" or
fn = "shellSync" or
fn = "stdout" or
fn = "stderr" or
fn = "sync"
)
or
shell = true and
(fn = "command" or fn = "commandSync")
) and
cmdArg = 0
}
private predicate execApi(string mod, int cmdArg, int optionsArg, boolean shell) {
shell = false and
(
mod = "cross-spawn" and cmdArg = 0 and optionsArg = -1
or
mod = "cross-spawn-async" and cmdArg = 0 and optionsArg = -1
or
mod = "exec-async" and cmdArg = 0 and optionsArg = -1
or
mod = "execa" and cmdArg = 0 and optionsArg = -1
)
or
shell = true and
mod = "exec" and
optionsArg = -2 and
cmdArg = 0
}
private class SystemCommandExecutors extends SystemCommandExecution, DataFlow::InvokeNode {
int cmdArg;
int optionsArg; // either a positive number representing the n'th argument, or a negative number representing the n'th last argument (e.g. -2 is the second last argument).
@@ -12,61 +56,19 @@ private class SystemCommandExecutors extends SystemCommandExecution, DataFlow::I
boolean sync;
SystemCommandExecutors() {
exists(string mod, DataFlow::SourceNode callee |
exists(string method |
mod = "cross-spawn" and method = "sync" and cmdArg = 0 and shell = false and optionsArg = -1
or
mod = "execa" and
optionsArg = -1 and
(
shell = false and
(
method = "shell" or
method = "shellSync" or
method = "stdout" or
method = "stderr" or
method = "sync"
)
or
shell = true and
(method = "command" or method = "commandSync")
) and
cmdArg = 0
or
mod = "execa" and
method = "node" and
cmdArg = 0 and
optionsArg = 1 and
shell = false
|
callee = DataFlow::moduleMember(mod, method) and
sync = getSync(method)
exists(string mod |
exists(string fn |
execApi(mod, fn, cmdArg, optionsArg, shell) and
sync = getSync(fn) and
this = API::moduleImport(mod).getMember(fn).getReturn().getAUse()
)
or
execApi(mod, cmdArg, optionsArg, shell) and
sync = false and
(
shell = false and
(
mod = "cross-spawn" and cmdArg = 0 and optionsArg = -1
or
mod = "cross-spawn-async" and cmdArg = 0 and optionsArg = -1
or
mod = "exec-async" and cmdArg = 0 and optionsArg = -1
or
mod = "execa" and cmdArg = 0 and optionsArg = -1
)
or
shell = true and
mod = "exec" and
optionsArg = -2 and
cmdArg = 0
) and
callee = DataFlow::moduleImport(mod)
|
this = callee.getACall()
this = API::moduleImport(mod).getReturn().getAUse()
)
or
this = DataFlow::moduleImport("foreground-child").getACall() and
this = API::moduleImport("foreground-child").getReturn().getAUse() and
cmdArg = 0 and
optionsArg = 1 and
shell = false and
@@ -110,19 +112,19 @@ private class RemoteCommandExecutor extends SystemCommandExecution, DataFlow::In
int cmdArg;
RemoteCommandExecutor() {
this = DataFlow::moduleImport("remote-exec").getACall() and
this = API::moduleImport("remote-exec").getReturn().getAUse() and
cmdArg = 1
or
exists(DataFlow::SourceNode ssh2, DataFlow::SourceNode client |
ssh2 = DataFlow::moduleImport("ssh2") and
(client = ssh2 or client = ssh2.getAPropertyRead("Client")) and
this = client.getAnInstantiation().getAMethodCall("exec") and
exists(API::Node ssh2, API::Node client |
ssh2 = API::moduleImport("ssh2") and
client in [ssh2, ssh2.getMember("Client")] and
this = client.getInstance().getMember("exec").getReturn().getAUse() and
cmdArg = 0
)
or
exists(DataFlow::SourceNode ssh2stream |
ssh2stream = DataFlow::moduleMember("ssh2-streams", "SSH2Stream") and
this = ssh2stream.getAnInstantiation().getAMethodCall("exec") and
exists(API::Node ssh2stream |
ssh2stream = API::moduleImport("ssh2-streams").getMember("SSH2Stream") and
this = ssh2stream.getInstance().getMember("exec").getReturn().getAUse() and
cmdArg = 1
)
}
@@ -137,7 +139,7 @@ private class RemoteCommandExecutor extends SystemCommandExecution, DataFlow::In
}
private class Opener extends SystemCommandExecution, DataFlow::InvokeNode {
Opener() { this = DataFlow::moduleImport("opener").getACall() }
Opener() { this = API::moduleImport("opener").getReturn().getAUse() }
override DataFlow::Node getACommandArgument() { result = getOptionArgument(1, "command") }

View File

@@ -126,7 +126,7 @@ abstract class RateLimiter extends Express::RouteHandlerExpr { }
*/
class ExpressRateLimit extends RateLimiter {
ExpressRateLimit() {
DataFlow::moduleImport("express-rate-limit").getAnInvocation().flowsToExpr(this)
this = API::moduleImport("express-rate-limit").getReturn().getAUse().asExpr()
}
}
@@ -135,11 +135,7 @@ class ExpressRateLimit extends RateLimiter {
*/
class BruteForceRateLimit extends RateLimiter {
BruteForceRateLimit() {
exists(DataFlow::ModuleImportNode expressBrute, DataFlow::SourceNode prevent |
expressBrute.getPath() = "express-brute" and
prevent = expressBrute.getAnInstantiation().getAPropertyRead("prevent") and
prevent.flowsToExpr(this)
)
this = API::moduleImport("express-brute").getInstance().getMember("prevent").getAUse().asExpr()
}
}
@@ -148,11 +144,8 @@ class BruteForceRateLimit extends RateLimiter {
*/
class RouteHandlerLimitedByExpressLimiter extends RateLimitedRouteHandlerExpr {
RouteHandlerLimitedByExpressLimiter() {
exists(DataFlow::ModuleImportNode expressLimiter |
expressLimiter.getPath() = "express-limiter" and
expressLimiter.getACall().getArgument(0).getALocalSource().asExpr() =
this.getSetup().getRouter()
)
API::moduleImport("express-limiter").getParameter(0).getARhs().getALocalSource().asExpr() =
this.getSetup().getRouter()
}
}
@@ -175,14 +168,14 @@ class RouteHandlerLimitedByExpressLimiter extends RateLimitedRouteHandlerExpr {
class RateLimiterFlexibleRateLimiter extends DataFlow::FunctionNode {
RateLimiterFlexibleRateLimiter() {
exists(
string rateLimiterClassName, DataFlow::SourceNode rateLimiterClass,
DataFlow::SourceNode rateLimiterInstance, DataFlow::ParameterNode request
string rateLimiterClassName, API::Node rateLimiterClass, API::Node rateLimiterConsume,
DataFlow::ParameterNode request
|
rateLimiterClassName.matches("RateLimiter%") and
rateLimiterClass = DataFlow::moduleMember("rate-limiter-flexible", rateLimiterClassName) and
rateLimiterInstance = rateLimiterClass.getAnInstantiation() and
rateLimiterClass = API::moduleImport("rate-limiter-flexible").getMember(rateLimiterClassName) and
rateLimiterConsume = rateLimiterClass.getInstance().getMember("consume") and
request.getParameter() = getRouteHandlerParameter(getFunction(), "request") and
request.getAPropertyRead() = rateLimiterInstance.getAMemberCall("consume").getAnArgument()
request.getAPropertyRead().flowsTo(rateLimiterConsume.getAParameter().getARhs())
)
}
}

View File

@@ -0,0 +1,117 @@
/**
* A test query that verifies assertions about the API graph embedded in source-code comments.
*
* An assertion is a comment of the form `def <path>` or `use <path>`, and asserts that
* there is a def/use feature reachable from the root along the given path (described using
* s-expression syntax), and its associated data-flow node must start on the same line as the
* comment.
*
* We also support negative assertions of the form `!def <path>` or `!use <path>`, which assert
* that there _isn't_ a node with the given path on the same line.
*
* The query only produces output for failed assertions, meaning that it should have no output
* under normal circumstances.
*
* Note that this query file isn't itself meant to be run as a test; instead, the `.qlref`s
* referring to it from inside the individual test directories should be run. However, when
* all tests are run this test will also be run, hence we need to check in a (somewhat nonsensical)
* `.expected` file for it as well.
*/
import javascript
private DataFlow::Node getNode(API::Node nd, string kind) {
kind = "def" and
result = nd.getARhs()
or
kind = "use" and
result = nd.getAUse()
}
private string getLoc(DataFlow::Node nd) {
exists(string filepath, int startline |
nd.hasLocationInfo(filepath, startline, _, _, _) and
result = filepath + ":" + startline
)
}
/**
* An assertion matching a data-flow node against an API-graph feature.
*/
class Assertion extends Comment {
string polarity;
string expectedKind;
string expectedLoc;
Assertion() {
exists(string txt, string rex |
txt = this.getText().trim() and
rex = "(!?)(def|use) .*"
|
polarity = txt.regexpCapture(rex, 1) and
expectedKind = txt.regexpCapture(rex, 2) and
expectedLoc = getFile().getAbsolutePath() + ":" + getLocation().getStartLine()
)
}
string getEdgeLabel(int i) { result = this.getText().regexpFind("(?<=\\()[^()]+", i, _).trim() }
int getPathLength() { result = max(int i | exists(getEdgeLabel(i))) + 1 }
API::Node lookup(int i) {
i = getPathLength() and
result = API::root()
or
result = lookup(i + 1).getASuccessor(getEdgeLabel(i))
}
predicate isNegative() { polarity = "!" }
predicate holds() { getLoc(getNode(lookup(0), expectedKind)) = expectedLoc }
string tryExplainFailure() {
exists(int i, API::Node nd, string prefix, string suffix |
nd = lookup(i) and
i > 0 and
not exists(lookup([0 .. i - 1])) and
prefix = nd + " has no outgoing edge labelled " + getEdgeLabel(i - 1) + ";" and
if exists(nd.getASuccessor())
then
suffix =
"it does have outgoing edges labelled " +
concat(string lbl | exists(nd.getASuccessor(lbl)) | lbl, ", ") + "."
else suffix = "it has no outgoing edges at all."
|
result = prefix + " " + suffix
)
or
exists(API::Node nd, string kind | nd = lookup(0) |
exists(getNode(nd, kind)) and
not exists(getNode(nd, expectedKind)) and
result = "Expected " + expectedKind + " node, but found " + kind + " node."
)
or
exists(DataFlow::Node nd | nd = getNode(lookup(0), expectedKind) |
not getLoc(nd) = expectedLoc and
result = "Node not found on this line (but there is one on line " + min(getLoc(nd)) + ")."
)
}
string explainFailure() {
if isNegative()
then (
holds() and
result = "Negative assertion failed."
) else (
not holds() and
(
result = tryExplainFailure()
or
not exists(tryExplainFailure()) and
result = "Positive assertion failed for unknown reasons."
)
)
}
}
query predicate failed(Assertion a, string explanation) { explanation = a.explainFailure() }

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1,6 @@
const assert = require("assert");
let o = {
foo: 23 /* def (member foo (parameter 0 (member equal (member exports (module assert))))) */
};
assert.equal(o, o);

View File

@@ -0,0 +1,3 @@
{
"name": "argprops"
}

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1,5 @@
const fs = require('fs-extra');
module.exports.foo = async function foo() {
return await fs.copy('/tmp/myfile', '/tmp/mynewfile'); /* use (promised (return (member copy (member exports (module fs-extra))))) */ /* def (promised (return (member foo (member exports (module async-await))))) */
};

View File

@@ -0,0 +1,6 @@
{
"name": "async-await",
"dependencies": {
"fs-extra": "*"
}
}

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1,7 @@
const fs = require('fs');
exports.foo = function (cb) {
if (!cb)
cb = function () { };
cb(fs.readFileSync("/etc/passwd")); /* def (parameter 0 (parameter 0 (member foo (member exports (module branching-flow))))) */
};

View File

@@ -0,0 +1,3 @@
{
"name": "branching-flow"
}

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1,23 @@
const util = require('util');
const EventEmitter = require('events');
function MyStream() {
EventEmitter.call(this);
}
util.inherits(MyStream, EventEmitter);
MyStream.prototype.write = (data) => this.emit('data', data);
function MyOtherStream() { /* use (instance (member MyOtherStream (member exports (module classes)))) */
EventEmitter.call(this);
}
util.inherits(MyOtherStream, EventEmitter);
MyOtherStream.prototype.write = function (data) { /* use (instance (member MyOtherStream (member exports (module classes)))) */
this.emit('data', data);
return this;
};
module.exports.MyOtherStream = MyOtherStream;

View File

@@ -0,0 +1,4 @@
{
"name": "classes",
"main": "./classes.js"
}

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1,5 @@
export class A {
constructor(x) { /* use (parameter 0 (member A (member exports (module ctor-arg)))) */
console.log(x);
}
}

View File

@@ -0,0 +1,3 @@
{
"name": "ctor-arg"
}

View File

@@ -0,0 +1,9 @@
class CustomEntryPoint extends API::EntryPoint {
CustomEntryPoint() { this = "CustomEntryPoint" }
override DataFlow::SourceNode getAUse() { result = DataFlow::globalVarRef("CustomEntryPoint") }
override DataFlow::Node getARhs() { none() }
}
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1 @@
module.exports = CustomEntryPoint.foo; /* use (member foo (CustomEntryPoint)) */

View File

@@ -0,0 +1,3 @@
{
"name": "custom-entry-point"
}

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

View File

@@ -0,0 +1,4 @@
const foo = require("foo");
while(foo)
foo = foo.foo; /* use (member foo (member exports (module foo))) */ /* use (member foo (member foo (member exports (module foo)))) */

View File

@@ -0,0 +1,6 @@
{
"name": "cyclic",
"dependencies": {
"foo": "*"
}
}

View File

@@ -0,0 +1 @@
import ApiGraphs.VerifyAssertions

Some files were not shown because too many files have changed in this diff Show More