Reorganise modelling of string concatenation.

This commit is contained in:
Max Schaefer
2020-01-24 15:57:35 +00:00
parent 6e4880bc53
commit 63ca382a0c
14 changed files with 461 additions and 71 deletions

View File

@@ -4,60 +4,6 @@
import go
module StringConcatenation {
/** Gets the `n`th operand to the string concatenation defining `node`. */
DataFlow::Node getOperand(DataFlow::Node node, int n) {
node.getType() instanceof StringType and
exists(DataFlow::BinaryOperationNode add | add = node and add.getOperator() = "+" |
n = 0 and result = add.getLeftOperand()
or
n = 1 and result = add.getRightOperand()
)
}
/** Gets an operand to the string concatenation defining `node`. */
DataFlow::Node getAnOperand(DataFlow::Node node) { result = getOperand(node, _) }
/** Gets the number of operands to the given concatenation. */
int getNumOperand(DataFlow::Node node) { result = strictcount(getAnOperand(node)) }
/** Gets the first operand to the string concatenation defining `node`. */
DataFlow::Node getFirstOperand(DataFlow::Node node) { result = getOperand(node, 0) }
/** Gets the last operand to the string concatenation defining `node`. */
DataFlow::Node getLastOperand(DataFlow::Node node) {
result = getOperand(node, getNumOperand(node) - 1)
}
/**
* Holds if `src` flows to `dst` through the `n`th operand of the given concatenation operator.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst, DataFlow::Node operator, int n) {
src = getOperand(dst, n) and
operator = dst
}
/**
* Holds if there is a taint step from `src` to `dst` through string concatenation.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst) { taintStep(src, dst, _, _) }
/**
* Holds if `node` is the root of a concatenation tree, that is,
* it is a concatenation operator that is not itself the immediate operand to
* another concatenation operator.
*/
predicate isRoot(DataFlow::Node node) {
exists(getAnOperand(node)) and
not node = getAnOperand(_)
}
/**
* Gets the root of the concatenation tree in which `node` is an operand or operator.
*/
DataFlow::Node getRoot(DataFlow::Node node) { isRoot(result) and node = getAnOperand*(result) }
}
module StringOps {
/**
* An expression that is equivalent to `strings.HasPrefix(A, B)` or `!strings.HasPrefix(A, B)`.
@@ -196,4 +142,323 @@ module StringOps {
override boolean getPolarity() { result = expr.getPolarity() }
}
}
/**
* A data-flow node that performs string concatenation.
*
* Extend this class to refine existing API models. If you want to model new APIs,
* extend `StringOps::Concatenation::Range` instead.
*/
class Concatenation extends DataFlow::Node {
Concatenation::Range self;
Concatenation() { this = self }
/**
* Gets the `n`th operand of this string concatenation, if there is a data-flow node for it.
*/
DataFlow::Node getOperand(int n) { result = self.getOperand(n) }
/**
* Gets the string value of the `n`th operand of this string concatenation, if it is a constant.
*/
string getOperandStringValue(int n) { result = self.getOperandStringValue(n) }
/**
* Gets the number of operands of this string concatenation.
*/
int getNumOperand() { result = self.getNumOperand() }
}
module Concatenation {
/**
* A data-flow node that performs string concatenation.
*
* Extend this class to model new APIs. If you want to refine existing API models,
* extend `StringOps::Concatenation` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the `n`th operand of this string concatenation, if there is a data-flow node for it.
*/
abstract DataFlow::Node getOperand(int n);
/**
* Gets the string value of the `n`th operand of this string concatenation, if it is
* a constant.
*/
string getOperandStringValue(int n) { result = getOperand(n).getStringValue() }
/**
* Gets the number of operands of this string concatenation.
*/
int getNumOperand() { result = count(getOperand(_)) }
}
/** A string concatenation using the `+` or `+=` operator. */
private class PlusConcat extends Range, DataFlow::BinaryOperationNode {
PlusConcat() {
getType() instanceof StringType and
getOperator() = "+"
}
override DataFlow::Node getOperand(int n) {
n = 0 and result = getLeftOperand()
or
n = 1 and result = getRightOperand()
}
}
/**
* Gets a regular expression for matching simple format-string components, including flags,
* width and precision specifiers, but not including `*` specifiers or explicit argument
* indices.
*/
private string getFormatComponentRegex() {
exists(string literal, string opt_flag, string opt_width, string operator, string verb |
literal = "([^%]|%%)+" and
opt_flag = "[-+ #0]?" and
opt_width = "((\\d*|\\*)(\\.(\\d*|\\*))?)?" and
operator = "[bcdeEfFgGoOpqstTxXUv]" and
verb = "(%" + opt_flag + opt_width + operator + ")"
|
result = "(" + literal + "|" + verb + ")"
)
}
/**
* A call to `fmt.Sprintf`, considered as a string concatenation.
*
* Only calls with simple format strings (no `*` specifiers, no explicit argument indices)
* are supported. Such format strings can be viewed as sequences of alternating literal and
* non-literal components. A literal component contains no `%` characters except `%%` pairs,
* while a non-literal component consists of `%`, a verb, and possibly flags and specifiers.
* Each non-literal component consumes exactly one argument.
*
* Literal components give rise to concatenation operands that have a string value but no
* data-flow node; non-literal `%s` or `%v` components give rise to concatenation operands
* that do have an associated data-flow node but possibly no string value; any other non-literal
* components give rise to concatenation operands that have neither an associated data-flow
* node nor a string value. This is because verbs like `%q` perform additional string
* transformations that we cannot easily represent.
*/
private class SprintfConcat extends Range, DataFlow::CallNode {
string fmt;
SprintfConcat() {
exists(Function sprintf | sprintf.hasQualifiedName("fmt", "Sprintf") |
this = sprintf.getACall() and
fmt = getArgument(0).getStringValue() and
fmt.regexpMatch(getFormatComponentRegex() + "*")
)
}
/**
* Gets the `n`th component of this format string.
*/
private string getComponent(int n) {
result = fmt.regexpFind(getFormatComponentRegex(), n, _)
}
override DataFlow::Node getOperand(int n) {
exists(int i, string part | part = "%s" or part = "%v" |
part = getComponent(n) and
i = n / 2 and
result = getArgument(i + 1)
)
}
override string getOperandStringValue(int n) {
result = Range.super.getOperandStringValue(n)
or
exists(string cmp | cmp = getComponent(n) |
(cmp.charAt(0) != "%" or cmp.charAt(1) = "%") and
result = cmp.replaceAll("%%", "%")
)
}
override int getNumOperand() { result = max(int i | exists(getComponent(i))) + 1 }
}
/**
* Holds if `src` flows to `dst` through the `n`th operand of the given concatenation operator.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst, Concatenation cat, int n) {
src = cat.getOperand(n) and
dst = cat
}
/**
* Holds if there is a taint step from `src` to `dst` through string concatenation.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst) { taintStep(src, dst, _, _) }
}
newtype TConcatenationElement =
/** A root concatenation element that is not itself an operand of a string concatenation. */
MkConcatenationRoot(Concatenation cat) { not cat = any(Concatenation parent).getOperand(_) } or
/** A concatenation element that is an operand of a string concatenation. */
MkConcatenationOperand(Concatenation parent, int i) { i in [0 .. parent.getNumOperand() - 1] }
/**
* An element of a string concatenation, which either itself performs a string concatenation or
* occurs as an operand in a string concatenation.
*
* For example, the expression `x + y + z` contains the following concatenation
* elements:
*
* - The leaf elements `x`, `y`, and `z`
* - The intermediate element `x + y`, which is both a concatenation and an operand
* - The root element `x + y + z`
*/
class ConcatenationElement extends TConcatenationElement {
/**
* Gets the data-flow node corresponding to this concatenation element, if any.
*/
DataFlow::Node asNode() {
this = MkConcatenationRoot(result)
or
exists(Concatenation parent, int i | this = MkConcatenationOperand(parent, i) |
result = parent.getOperand(i)
)
}
/**
* Gets the string value of this concatenation element if it is a constant.
*/
string getStringValue() {
result = asNode().getStringValue()
or
exists(Concatenation parent, int i | this = MkConcatenationOperand(parent, i) |
result = parent.getOperandStringValue(i)
)
}
/**
* Gets the `n`th operand of this string concatenation.
*/
ConcatenationOperand getOperand(int n) { result = MkConcatenationOperand(asNode(), n) }
/**
* Gets an operand of this string concatenation.
*/
ConcatenationOperand getAnOperand() { result = this.getOperand(_) }
/**
* Gets the number of operands of this string concatenation.
*/
int getNumOperand() { result = count(this.getAnOperand()) }
/**
* Gets the first operand of this string concatenation.
*
* For example, the first operand of `(x + y) + z` is `(x + y)`.
*/
ConcatenationOperand getFirstOperand() { result = getOperand(0) }
/**
* Gets the last operand of this string concatenation.
*
* For example, the last operand of `x + (y + z)` is `(y + z)`.
*/
ConcatenationOperand getLastOperand() { result = getOperand(getNumOperand() - 1) }
/**
* Gets the root of the concatenation tree to which this element belongs.
*/
ConcatenationRoot getConcatenationRoot() { this = result.getAnOperand*() }
/**
* Gets a leaf in the concatenation tree that this element is the root of.
*/
ConcatenationLeaf getALeaf() { result = this.getAnOperand*() }
/**
* Gets the first leaf in this concatenation tree.
*
* For example, the first leaf of `(x + y) + z` is `x`.
*/
ConcatenationLeaf getFirstLeaf() { result = getFirstOperand*() }
/**
* Gets the last leaf in this concatenation tree.
*
* For example, the last leaf of `x + (y + z)` is `z`.
*/
ConcatenationLeaf getLastLeaf() { result = getLastOperand*() }
/** Gets a textual representation of this concatenation element. */
string toString() {
if exists(asNode())
then result = asNode().toString()
else
if exists(getStringValue())
then result = getStringValue()
else result = "concatenation element"
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
or
// use dummy location for elements that don't have a corresponding node
not exists(asNode()) and
filepath = "" and
startline = 0 and
startcolumn = 0 and
endline = 0 and
endcolumn = 0
}
}
/**
* One of the operands in a string concatenation.
*
* See `ConcatenationElement` for more information.
*/
class ConcatenationOperand extends ConcatenationElement, MkConcatenationOperand { }
/**
* A data-flow node that performs a string concatenation, and is not an
* immediate operand in a larger string concatenation.
*
* See `ConcatenationElement` for more information.
*/
class ConcatenationRoot extends ConcatenationElement, MkConcatenationRoot { }
/**
* An operand to a concatenation that is not itself a concatenation.
*
* See `ConcatenationElement` for more information.
*/
class ConcatenationLeaf extends ConcatenationOperand {
ConcatenationLeaf() { not exists(getAnOperand()) }
/**
* Gets the operand immediately preceding this one in its parent concatenation.
*
* For example, in `(x + y) + z`, the previous leaf for `z` is `y`.
*/
ConcatenationLeaf getPreviousLeaf() {
exists(ConcatenationElement parent, int i |
result = parent.getOperand(i - 1).getLastLeaf() and
this = parent.getOperand(i).getFirstLeaf()
)
}
/**
* Gets the operand immediately succeeding this one in its parent concatenation.
*
* For example, in `(x + y) + z`, the previous leaf for `y` is `z`.
*/
ConcatenationLeaf getNextLeaf() { this = result.getPreviousLeaf() }
}
}

View File

@@ -7,12 +7,17 @@
import go
/**
* Holds if the given data flow node refers to a string that ends with a slash.
* Holds if the string value of `cat` prevents anything appended after it
* from affecting the hostname or path of a URL.
*
* Specifically, this holds if the string contains `?` or `#`.
*/
private predicate endsWithSlash(DataFlow::Node nd) {
nd.getStringValue().matches("%/")
or
endsWithSlash(StringConcatenation::getLastOperand(nd))
private predicate concatenationHasSanitizingSubstring(StringOps::ConcatenationElement cat) {
exists(StringOps::ConcatenationLeaf lf | lf = cat.getALeaf() |
lf.getStringValue().regexpMatch(".*[?#].*")
or
hasSanitizingSubstring(lf.asNode().getAPredecessor())
)
}
/**
@@ -22,9 +27,9 @@ private predicate endsWithSlash(DataFlow::Node nd) {
* Specifically, this holds if the string contains `?` or `#`.
*/
private predicate hasSanitizingSubstring(DataFlow::Node nd) {
nd.getStringValue().regexpMatch(".*[?#].*")
or
hasSanitizingSubstring(StringConcatenation::getAnOperand(nd))
exists(StringOps::ConcatenationElement cat | nd = cat.asNode() |
concatenationHasSanitizingSubstring(cat)
)
or
hasSanitizingSubstring(nd.getAPredecessor())
}
@@ -36,9 +41,21 @@ private predicate hasSanitizingSubstring(DataFlow::Node nd) {
* This is considered as a sanitizing edge for the URL redirection queries.
*/
predicate sanitizingPrefixEdge(DataFlow::Node source, DataFlow::Node sink) {
exists(DataFlow::Node operator, int n |
StringConcatenation::taintStep(source, sink, operator, n) and
hasSanitizingSubstring(StringConcatenation::getOperand(operator, [0 .. n - 1]))
exists(StringOps::ConcatenationElement cat, int n |
StringOps::Concatenation::taintStep(source, sink, cat.asNode(), n) and
concatenationHasSanitizingSubstring(cat.getOperand([0 .. n - 1]))
)
}
/**
* Holds if the string value of `cat` prevents anything appended after it
* from affecting the hostname of a URL.
*/
private predicate concatenationHasHostnameSanitizingSubstring(StringOps::ConcatenationElement cat) {
exists(StringOps::ConcatenationLeaf lf | lf = cat.getALeaf() |
lf.getStringValue().regexpMatch(".*([?#]|[^?#:/\\\\][/\\\\]).*|[/\\\\][^/\\\\].*")
or
hasHostnameSanitizingSubstring(lf.asNode())
)
}
@@ -56,9 +73,9 @@ predicate sanitizingPrefixEdge(DataFlow::Node source, DataFlow::Node sink) {
* the `//` separating the (optional) scheme from the hostname.
*/
predicate hasHostnameSanitizingSubstring(DataFlow::Node nd) {
nd.getStringValue().regexpMatch(".*([?#]|[^?#:/\\\\][/\\\\]).*|[/\\\\][^/\\\\].*")
or
hasHostnameSanitizingSubstring(StringConcatenation::getAnOperand(nd))
exists(StringOps::ConcatenationElement cat | cat.asNode() = nd |
concatenationHasHostnameSanitizingSubstring(cat)
)
or
hasHostnameSanitizingSubstring(nd.getAPredecessor())
}
@@ -70,8 +87,8 @@ predicate hasHostnameSanitizingSubstring(DataFlow::Node nd) {
* This is considered as a sanitizing edge for the URL redirection queries.
*/
predicate hostnameSanitizingPrefixEdge(DataFlow::Node source, DataFlow::Node sink) {
exists(DataFlow::Node operator, int n |
StringConcatenation::taintStep(source, sink, operator, n) and
hasHostnameSanitizingSubstring(StringConcatenation::getOperand(operator, [0 .. n - 1]))
exists(StringOps::ConcatenationElement cat, int n |
StringOps::Concatenation::taintStep(source, sink, cat.asNode(), n) and
concatenationHasHostnameSanitizingSubstring(cat.getOperand([0 .. n - 1]))
)
}

View File

@@ -0,0 +1,29 @@
| file://:0:0:0:0 | and |
| file://:0:0:0:0 | ! |
| file://:0:0:0:0 | ' |
| file://:0:0:0:0 | ' |
| file://:0:0:0:0 | , quoted: |
| file://:0:0:0:0 | Here are |
| file://:0:0:0:0 | concatenation element |
| main.go:6:14:6:15 | s1 |
| main.go:6:14:6:20 | ...+... |
| main.go:6:14:6:25 | ...+... |
| main.go:6:19:6:20 | s2 |
| main.go:6:24:6:25 | s3 |
| main.go:7:14:7:15 | s1 |
| main.go:7:14:7:27 | ...+... |
| main.go:7:20:7:21 | s2 |
| main.go:7:20:7:26 | ...+... |
| main.go:7:25:7:26 | s3 |
| main.go:8:14:8:27 | ...+... |
| main.go:8:15:8:16 | s1 |
| main.go:8:15:8:21 | ...+... |
| main.go:8:20:8:21 | s2 |
| main.go:8:26:8:27 | s3 |
| main.go:9:2:9:24 | call to Sprintf |
| main.go:9:22:9:23 | s1 |
| main.go:10:2:10:43 | call to Sprintf |
| main.go:10:37:10:38 | s1 |
| main.go:10:41:10:42 | s2 |
| main.go:11:2:11:38 | call to Sprintf |
| main.go:11:32:11:33 | s1 |

View File

@@ -0,0 +1,4 @@
import go
from StringOps::ConcatenationElement elt
select elt

View File

@@ -0,0 +1,22 @@
| main.go:6:14:6:15 | s1 | main.go:6:14:6:15 | s1 |
| main.go:6:14:6:20 | ...+... | main.go:6:14:6:20 | ...+... |
| main.go:6:14:6:25 | ...+... | main.go:6:14:6:25 | ...+... |
| main.go:6:19:6:20 | s2 | main.go:6:19:6:20 | s2 |
| main.go:6:24:6:25 | s3 | main.go:6:24:6:25 | s3 |
| main.go:7:14:7:15 | s1 | main.go:7:14:7:15 | s1 |
| main.go:7:14:7:27 | ...+... | main.go:7:14:7:27 | ...+... |
| main.go:7:20:7:21 | s2 | main.go:7:20:7:21 | s2 |
| main.go:7:20:7:26 | ...+... | main.go:7:20:7:26 | ...+... |
| main.go:7:25:7:26 | s3 | main.go:7:25:7:26 | s3 |
| main.go:8:14:8:27 | ...+... | main.go:8:14:8:27 | ...+... |
| main.go:8:15:8:16 | s1 | main.go:8:15:8:16 | s1 |
| main.go:8:15:8:21 | ...+... | main.go:8:15:8:21 | ...+... |
| main.go:8:20:8:21 | s2 | main.go:8:20:8:21 | s2 |
| main.go:8:26:8:27 | s3 | main.go:8:26:8:27 | s3 |
| main.go:9:2:9:24 | call to Sprintf | main.go:9:2:9:24 | call to Sprintf |
| main.go:9:22:9:23 | s1 | main.go:9:22:9:23 | s1 |
| main.go:10:2:10:43 | call to Sprintf | main.go:10:2:10:43 | call to Sprintf |
| main.go:10:37:10:38 | s1 | main.go:10:37:10:38 | s1 |
| main.go:10:41:10:42 | s2 | main.go:10:41:10:42 | s2 |
| main.go:11:2:11:38 | call to Sprintf | main.go:11:2:11:38 | call to Sprintf |
| main.go:11:32:11:33 | s1 | main.go:11:32:11:33 | s1 |

View File

@@ -0,0 +1,4 @@
import go
from StringOps::ConcatenationElement elt
select elt, elt.asNode()

View File

@@ -0,0 +1,23 @@
| main.go:6:14:6:20 | ...+... | 0 | main.go:6:14:6:15 | s1 |
| main.go:6:14:6:20 | ...+... | 1 | main.go:6:19:6:20 | s2 |
| main.go:6:14:6:25 | ...+... | 0 | main.go:6:14:6:20 | ...+... |
| main.go:6:14:6:25 | ...+... | 1 | main.go:6:24:6:25 | s3 |
| main.go:7:14:7:27 | ...+... | 0 | main.go:7:14:7:15 | s1 |
| main.go:7:14:7:27 | ...+... | 1 | main.go:7:20:7:26 | ...+... |
| main.go:7:20:7:26 | ...+... | 0 | main.go:7:20:7:21 | s2 |
| main.go:7:20:7:26 | ...+... | 1 | main.go:7:25:7:26 | s3 |
| main.go:8:14:8:27 | ...+... | 0 | main.go:8:15:8:21 | ...+... |
| main.go:8:14:8:27 | ...+... | 1 | main.go:8:26:8:27 | s3 |
| main.go:8:15:8:21 | ...+... | 0 | main.go:8:15:8:16 | s1 |
| main.go:8:15:8:21 | ...+... | 1 | main.go:8:20:8:21 | s2 |
| main.go:9:2:9:24 | call to Sprintf | 0 | file://:0:0:0:0 | ' |
| main.go:9:2:9:24 | call to Sprintf | 1 | main.go:9:22:9:23 | s1 |
| main.go:9:2:9:24 | call to Sprintf | 2 | file://:0:0:0:0 | ' |
| main.go:10:2:10:43 | call to Sprintf | 0 | file://:0:0:0:0 | Here are |
| main.go:10:2:10:43 | call to Sprintf | 1 | main.go:10:37:10:38 | s1 |
| main.go:10:2:10:43 | call to Sprintf | 2 | file://:0:0:0:0 | and |
| main.go:10:2:10:43 | call to Sprintf | 3 | main.go:10:41:10:42 | s2 |
| main.go:10:2:10:43 | call to Sprintf | 4 | file://:0:0:0:0 | ! |
| main.go:11:2:11:38 | call to Sprintf | 0 | main.go:11:32:11:33 | s1 |
| main.go:11:2:11:38 | call to Sprintf | 1 | file://:0:0:0:0 | , quoted: |
| main.go:11:2:11:38 | call to Sprintf | 2 | file://:0:0:0:0 | concatenation element |

View File

@@ -0,0 +1,4 @@
import go
from StringOps::ConcatenationElement elt, int i
select elt, i, elt.getOperand(i)

View File

@@ -0,0 +1,6 @@
| file://:0:0:0:0 | and | and |
| file://:0:0:0:0 | ! | ! |
| file://:0:0:0:0 | ' | ' |
| file://:0:0:0:0 | ' | ' |
| file://:0:0:0:0 | , quoted: | , quoted: |
| file://:0:0:0:0 | Here are | Here are |

View File

@@ -0,0 +1,4 @@
import go
from StringOps::ConcatenationElement elt
select elt, elt.getStringValue()

View File

@@ -0,0 +1,12 @@
package main
import "fmt"
func test(s1, s2, s3 string, i int) {
fmt.Println(s1 + s2 + s3)
fmt.Println(s1 + (s2 + s3))
fmt.Println((s1 + s2) + s3)
fmt.Sprintf("'%s'", s1)
fmt.Sprintf("Here are %s and %s!", s1, s2)
fmt.Sprintf("%v, quoted: %q", s1, s1)
}