Merge pull request #208 from max/incomplete-url-scheme-check

Add `IncompleteUrlSchemeCheck` query
This commit is contained in:
Sauyon Lee
2020-01-08 00:50:58 -08:00
committed by GitHub Enterprise
18 changed files with 447 additions and 68 deletions

View File

@@ -10,6 +10,7 @@
|---------------------------------------------------------------------------|----------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|
| Constant length comparison (`go/constant-length-comparison`) | correctness | Highlights code that checks the length of an array or slice against a constant before indexing it using a variable, suggesting a logic error. Results are shown on LGTM by default. |
| Impossible interface nil check (`go/impossible-interface-nil-check`) | correctness | Highlights code that compares an interface value that cannot be `nil` to `nil`, suggesting a logic error. Results are shown on LGTM by default. |
| Incomplete URL scheme check (`go/incomplete-url-scheme-check`) | correctness, security, external/cwe/cwe-020 | Highlights checks for `javascript` URLs that do not take `data` or `vbscript` URLs into account. Results are shown on LGTM by default. |
## Changes to existing queries

View File

@@ -0,0 +1,11 @@
package main
import "net/url"
func sanitizeUrl(urlstr string) string {
u, err := url.Parse(urlstr)
if err != nil || u.Scheme == "javascript" {
return "about:blank"
}
return urlstr
}

View File

@@ -0,0 +1,42 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
URLs with the special scheme <code>javascript</code> can be used to encode JavaScript code to be
executed when the URL is visited. While this is a powerful mechanism for creating feature-rich and
responsive web applications, it is also a potential security risk: if the URL comes from an
untrusted source, it might contain harmful JavaScript code. For this reason, many frameworks and
libraries first check the URL scheme of any untrusted URL, and reject URLs with the
<code>javascript</code> scheme.
</p>
<p>
However, the <code>data</code> and <code>vbscript</code> schemes can be used to represent
executable code in a very similar way, so any validation logic that checks against
<code>javascript</code>, but not against <code>data</code> and <code>vbscript</code>, is likely
to be insufficient.
</p>
</overview>
<recommendation>
<p>
Add checks covering both <code>data:</code> and <code>vbscript:</code>.
</p>
</recommendation>
<example>
<p>
The following function validates a (presumably untrusted) URL <code>urlstr</code>. If its scheme is
<code>javascript</code>, the harmless placeholder URL <code>about:blank</code> is returned to
prevent code injection; otherwise <code>urlstr</code> itself is returned.
</p>
<sample src="IncompleteUrlSchemeCheck.go"/>
<p>
While this check provides partial projection, it should be extended to cover <code>data</code>
and <code>vbscript</code> as well:
</p>
<sample src="IncompleteUrlSchemeCheckGood.go"/>
</example>
<references>
<li>WHATWG: <a href="https://wiki.whatwg.org/wiki/URL_schemes">URL schemes</a>.</li>
</references>
</qhelp>

View File

@@ -0,0 +1,60 @@
/**
* @name Incomplete URL scheme check
* @description Checking for the "javascript:" URL scheme without also checking for "vbscript:"
* and "data:" suggests a logic error or even a security vulnerability.
* @kind problem
* @problem.severity warning
* @precision high
* @id go/incomplete-url-scheme-check
* @tags security
* correctness
* external/cwe/cwe-020
*/
import go
/** A URL scheme that can be used to represent executable code. */
class DangerousScheme extends string {
DangerousScheme() { this = "data" or this = "javascript" or this = "vbscript" }
}
/** Gets a data-flow node that checks an instance of `g` against the given `scheme`. */
DataFlow::Node schemeCheck(GVN g, DangerousScheme scheme) {
// check of the form `nd.Scheme == scheme`
exists(NamedType url, DataFlow::FieldReadNode fr, DataFlow::Node s |
url.hasQualifiedName("net/url", "URL") and
fr.readsField(g.getANode(), url.getField("Scheme")) and
s.getStringValue() = scheme and
result.(DataFlow::EqualityTestNode).eq(_, fr, s)
)
or
// check of the form `strings.HasPrefix(nd, "scheme:")`
exists(StringOps::HasPrefix hasPrefix | result = hasPrefix |
hasPrefix.getBaseString() = g.getANode() and
hasPrefix.getSubstring().getStringValue() = scheme + ":"
)
or
// propagate through various string transformations
exists(string stringop, DataFlow::CallNode c |
stringop = "Map" or
stringop.matches("Replace%") or
stringop.matches("To%") or
stringop.matches("Trim%")
|
c.getTarget().hasQualifiedName("strings", stringop) and
c.getAnArgument() = g.getANode() and
result = schemeCheck(globalValueNumber(c), scheme)
)
}
/**
* Gets a scheme that `g`, which is checked against at least one scheme, is not checked against.
*/
DangerousScheme getAMissingScheme(GVN g) {
exists(schemeCheck(g, _)) and
not exists(schemeCheck(g, result))
}
from GVN g
select schemeCheck(g, "javascript"),
"This check does not consider " + strictconcat(getAMissingScheme(g), " and ") + "."

View File

@@ -0,0 +1,11 @@
package main
import "net/url"
func sanitizeUrlGod(urlstr string) string {
u, err := url.Parse(urlstr)
if err != nil || u.Scheme == "javascript" || u.Scheme == "data" || u.Scheme == "vbscript" {
return "about:blank"
}
return urlstr
}

View File

@@ -13,7 +13,7 @@ import semmle.go.Locations
import semmle.go.Packages
import semmle.go.Scopes
import semmle.go.Stmt
import semmle.go.StringConcatenation
import semmle.go.StringOps
import semmle.go.Types
import semmle.go.controlflow.BasicBlocks
import semmle.go.controlflow.ControlFlowGraph

View File

@@ -1,59 +0,0 @@
/**
* Provides predicates for analyzing string concatenations and their operands.
*/
import go
module StringConcatenation {
/** Gets the `n`th operand to the string concatenation defining `node`. */
DataFlow::Node getOperand(DataFlow::Node node, int n) {
node.getType() instanceof StringType and
exists(DataFlow::BinaryOperationNode add | add = node and add.getOperator() = "+" |
n = 0 and result = add.getLeftOperand()
or
n = 1 and result = add.getRightOperand()
)
}
/** Gets an operand to the string concatenation defining `node`. */
DataFlow::Node getAnOperand(DataFlow::Node node) { result = getOperand(node, _) }
/** Gets the number of operands to the given concatenation. */
int getNumOperand(DataFlow::Node node) { result = strictcount(getAnOperand(node)) }
/** Gets the first operand to the string concatenation defining `node`. */
DataFlow::Node getFirstOperand(DataFlow::Node node) { result = getOperand(node, 0) }
/** Gets the last operand to the string concatenation defining `node`. */
DataFlow::Node getLastOperand(DataFlow::Node node) {
result = getOperand(node, getNumOperand(node) - 1)
}
/**
* Holds if `src` flows to `dst` through the `n`th operand of the given concatenation operator.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst, DataFlow::Node operator, int n) {
src = getOperand(dst, n) and
operator = dst
}
/**
* Holds if there is a taint step from `src` to `dst` through string concatenation.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst) { taintStep(src, dst, _, _) }
/**
* Holds if `node` is the root of a concatenation tree, that is,
* it is a concatenation operator that is not itself the immediate operand to
* another concatenation operator.
*/
predicate isRoot(DataFlow::Node node) {
exists(getAnOperand(node)) and
not node = getAnOperand(_)
}
/**
* Gets the root of the concatenation tree in which `node` is an operand or operator.
*/
DataFlow::Node getRoot(DataFlow::Node node) { isRoot(result) and node = getAnOperand*(result) }
}

View File

@@ -0,0 +1,199 @@
/**
* Provides predicates and classes for working with string operations.
*/
import go
module StringConcatenation {
/** Gets the `n`th operand to the string concatenation defining `node`. */
DataFlow::Node getOperand(DataFlow::Node node, int n) {
node.getType() instanceof StringType and
exists(DataFlow::BinaryOperationNode add | add = node and add.getOperator() = "+" |
n = 0 and result = add.getLeftOperand()
or
n = 1 and result = add.getRightOperand()
)
}
/** Gets an operand to the string concatenation defining `node`. */
DataFlow::Node getAnOperand(DataFlow::Node node) { result = getOperand(node, _) }
/** Gets the number of operands to the given concatenation. */
int getNumOperand(DataFlow::Node node) { result = strictcount(getAnOperand(node)) }
/** Gets the first operand to the string concatenation defining `node`. */
DataFlow::Node getFirstOperand(DataFlow::Node node) { result = getOperand(node, 0) }
/** Gets the last operand to the string concatenation defining `node`. */
DataFlow::Node getLastOperand(DataFlow::Node node) {
result = getOperand(node, getNumOperand(node) - 1)
}
/**
* Holds if `src` flows to `dst` through the `n`th operand of the given concatenation operator.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst, DataFlow::Node operator, int n) {
src = getOperand(dst, n) and
operator = dst
}
/**
* Holds if there is a taint step from `src` to `dst` through string concatenation.
*/
predicate taintStep(DataFlow::Node src, DataFlow::Node dst) { taintStep(src, dst, _, _) }
/**
* Holds if `node` is the root of a concatenation tree, that is,
* it is a concatenation operator that is not itself the immediate operand to
* another concatenation operator.
*/
predicate isRoot(DataFlow::Node node) {
exists(getAnOperand(node)) and
not node = getAnOperand(_)
}
/**
* Gets the root of the concatenation tree in which `node` is an operand or operator.
*/
DataFlow::Node getRoot(DataFlow::Node node) { isRoot(result) and node = getAnOperand*(result) }
}
module StringOps {
/**
* An expression that is equivalent to `strings.HasPrefix(A, B)` or `!strings.HasPrefix(A, B)`.
*
* Extends this class to refine existing API models. If you want to model new APIs,
* extend `StringOps::HasPrefix::Range` instead.
*/
class HasPrefix extends DataFlow::Node {
HasPrefix::Range range;
HasPrefix() { range = this }
/**
* Gets the `A` in `strings.HasPrefix(A, B)`.
*/
DataFlow::Node getBaseString() { result = range.getBaseString() }
/**
* Gets the `B` in `strings.HasPrefix(A, B)`.
*/
DataFlow::Node getSubstring() { result = range.getSubstring() }
/**
* Gets the polarity of the check.
*
* If the polarity is `false` the check returns `true` if the string does not start
* with the given substring.
*/
boolean getPolarity() { result = range.getPolarity() }
}
class StartsWith = HasPrefix;
module HasPrefix {
/**
* An expression that is equivalent to `strings.HasPrefix(A, B)` or `!strings.HasPrefix(A, B)`.
*
* Extends this class to model new APIs. If you want to refine existing API models, extend
* `StringOps::HasPrefix` instead.
*/
abstract class Range extends DataFlow::Node {
/**
* Gets the `A` in `strings.HasPrefix(A, B)`.
*/
abstract DataFlow::Node getBaseString();
/**
* Gets the `B` in `strings.HasPrefix(A, B)`.
*/
abstract DataFlow::Node getSubstring();
/**
* Gets the polarity of the check.
*
* If the polarity is `false` the check returns `true` if the string does not start
* with the given substring.
*/
boolean getPolarity() { result = true }
}
/**
* An expression of form `strings.HasPrefix(A, B)`.
*/
private class StringsHasPrefix extends Range, DataFlow::CallNode {
StringsHasPrefix() { getTarget().hasQualifiedName("strings", "HasPrefix") }
override DataFlow::Node getBaseString() { result = getArgument(0) }
override DataFlow::Node getSubstring() { result = getArgument(1) }
}
/**
* An expression of form `strings.Index(A, B) === 0`.
*/
private class HasPrefix_IndexOfEquals extends Range, DataFlow::EqualityTestNode {
DataFlow::CallNode indexOf;
HasPrefix_IndexOfEquals() {
indexOf.getTarget().hasQualifiedName("strings", "Index") and
getAnOperand() = globalValueNumber(indexOf).getANode() and
getAnOperand().getIntValue() = 0
}
override DataFlow::Node getBaseString() { result = indexOf.getArgument(0) }
override DataFlow::Node getSubstring() { result = indexOf.getArgument(1) }
override boolean getPolarity() { result = expr.getPolarity() }
}
/**
* A comparison of form `x[0] === 'k'` for some rune literal `k`.
*/
private class HasPrefix_FirstCharacter extends Range, DataFlow::EqualityTestNode {
DataFlow::ElementReadNode read;
DataFlow::Node runeLiteral;
HasPrefix_FirstCharacter() {
read.getBase().getType().getUnderlyingType() instanceof StringType and
read.getIndex().getIntValue() = 0 and
eq(_, globalValueNumber(read).getANode(), runeLiteral)
}
override DataFlow::Node getBaseString() { result = read.getBase() }
override DataFlow::Node getSubstring() { result = runeLiteral }
override boolean getPolarity() { result = expr.getPolarity() }
}
/**
* A comparison of form `x[:len(y)] === y`.
*/
private class HasPrefix_Substring extends Range, DataFlow::EqualityTestNode {
DataFlow::SliceNode slice;
DataFlow::Node substring;
HasPrefix_Substring() {
eq(_, slice, substring) and
slice.getLow().getIntValue() = 0 and
(
exists(DataFlow::CallNode len |
len = Builtin::len().getACall() and
len.getArgument(0) = globalValueNumber(substring).getANode() and
slice.getHigh() = globalValueNumber(len).getANode()
)
or
substring.getStringValue().length() = slice.getHigh().getIntValue()
)
}
override DataFlow::Node getBaseString() { result = slice.getBase() }
override DataFlow::Node getSubstring() { result = substring }
override boolean getPolarity() { result = expr.getPolarity() }
}
}
}

View File

@@ -1413,4 +1413,25 @@ module IR {
ExtractTupleElementInstruction extractTupleElement(Instruction base, int idx) {
result.extractsElement(base, idx)
}
/**
* Gets the instruction corresponding to the implicit lower bound of slice `e`, if any.
*/
EvalImplicitLowerSliceBoundInstruction implicitLowerSliceBoundInstruction(SliceExpr e) {
result = MkImplicitLowerSliceBound(e)
}
/**
* Gets the instruction corresponding to the implicit upper bound of slice `e`, if any.
*/
EvalImplicitUpperSliceBoundInstruction implicitUpperSliceBoundInstruction(SliceExpr e) {
result = MkImplicitUpperSliceBound(e)
}
/**
* Gets the instruction corresponding to the implicit maximum bound of slice `e`, if any.
*/
EvalImplicitMaxSliceBoundInstruction implicitMaxSliceBoundInstruction(SliceExpr e) {
result = MkImplicitMaxSliceBound(e)
}
}

View File

@@ -112,9 +112,7 @@ class Node extends TNode {
* Holds if the result of this instruction is known at compile time, and is guaranteed not to
* depend on the platform where it is evaluated.
*/
predicate isPlatformIndependentConstant() {
this.asInstruction().isPlatformIndependentConstant()
}
predicate isPlatformIndependentConstant() { this.asInstruction().isPlatformIndependentConstant() }
/**
* Gets a data-flow node to which data may flow from this node in one (intra-procedural) step.
@@ -449,7 +447,7 @@ class ReadNode extends InstructionNode {
/**
* Holds if this data-flow node evaluates to value of `v`, which is a value entity, that is, a
* constant, variable, field, function, or method.
* constant, variable, field, function, or method.
*/
predicate reads(ValueEntity v) { insn.reads(v) }
@@ -492,6 +490,35 @@ class ElementReadNode extends ReadNode {
predicate reads(Node base, Node index) { getBase() = base and getIndex() = index }
}
/**
* A data-flow node that extracts a substring or slice from a string, array, pointer to array,
* or slice.
*/
class SliceNode extends ExprNode {
override SliceExpr expr;
/** Gets the base of this slice node. */
Node getBase() { result = exprNode(expr.getBase()) }
/** Gets the lower bound of this slice node. */
Node getLow() {
result = exprNode(expr.getLow()) or
result = instructionNode(IR::implicitLowerSliceBoundInstruction(expr))
}
/** Gets the upper bound of this slice node. */
Node getHigh() {
result = exprNode(expr.getHigh()) or
result = instructionNode(IR::implicitUpperSliceBoundInstruction(expr))
}
/** Gets the maximum of this slice node. */
Node getMax() {
result = exprNode(expr.getMax()) or
result = instructionNode(IR::implicitMaxSliceBoundInstruction(expr))
}
}
/**
* A data-flow node corresponding to an expression with a binary operator.
*/
@@ -512,7 +539,7 @@ class BinaryOperationNode extends Node {
|
left = exprNode(assgn.getLhs()) and
right = exprNode(assgn.getRhs()) and
op = o.substring(0, o.length()-1)
op = o.substring(0, o.length() - 1)
)
or
exists(IR::EvalIncDecRhsInstruction rhs, IncDecStmt ids |
@@ -683,9 +710,7 @@ Node extractTupleElement(Node t, int i) {
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
simpleLocalFlowStep(nodeFrom, nodeTo)
}
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
/**
* INTERNAL: do not use.

View File

@@ -0,0 +1,6 @@
| main.go:7:9:7:33 | call to HasPrefix | main.go:7:27:7:28 | s1 | main.go:7:31:7:32 | s2 | true |
| main.go:8:3:8:10 | ...==... | main.go:6:23:6:24 | s1 | main.go:6:27:6:28 | s2 | true |
| main.go:9:3:9:14 | ...!=... | main.go:9:3:9:4 | s1 | main.go:9:12:9:14 | 'x' | false |
| main.go:10:3:10:21 | ...==... | main.go:10:3:10:4 | s1 | main.go:10:20:10:21 | s2 | true |
| main.go:11:3:11:20 | ...==... | main.go:11:3:11:4 | s1 | main.go:11:19:11:20 | s2 | true |
| main.go:12:3:12:18 | ...==... | main.go:12:3:12:4 | s1 | main.go:12:14:12:18 | "hi!" | true |

View File

@@ -0,0 +1,4 @@
import go
from StringOps::HasPrefix hp
select hp, hp.getBaseString(), hp.getSubstring(), hp.getPolarity()

View File

@@ -0,0 +1,13 @@
package main
import "strings"
func test(s1, s2 string) bool {
idx := strings.Index(s1, s2)
return strings.HasPrefix(s1, s2) ||
idx == 0 ||
s1[0] != 'x' ||
s1[0:len(s2)] == s2 ||
s1[:len(s2)] == s2 ||
s1[0:3] == "hi!"
}

View File

@@ -0,0 +1,2 @@
| IncompleteUrlSchemeCheck.go:7:22:7:45 | ...==... | This check does not consider data and vbscript. |
| main.go:17:5:17:44 | call to HasPrefix | This check does not consider vbscript. |

View File

@@ -0,0 +1,11 @@
package main
import "net/url"
func sanitizeUrl(urlstr string) string {
u, err := url.Parse(urlstr)
if err != nil || u.Scheme == "javascript" {
return "about:blank"
}
return urlstr
}

View File

@@ -0,0 +1 @@
Security/CWE-020/IncompleteUrlSchemeCheck.ql

View File

@@ -0,0 +1,11 @@
package main
import "net/url"
func sanitizeUrlGod(urlstr string) string {
u, err := url.Parse(urlstr)
if err != nil || u.Scheme == "javascript" || u.Scheme == "data" || u.Scheme == "vbscript" {
return "about:blank"
}
return urlstr
}

View File

@@ -0,0 +1,20 @@
package main
import (
"net/url"
"strings"
)
func test(urlstr string) {
u, _ := url.Parse(urlstr)
sch := u.Scheme
if sch == "javascript" || u.Scheme == "data" || sch == "vbscript" { // OK
return
}
urlstr = strings.NewReplacer("\n", "", "\r", "", "\t", "", "\u0000", "").Replace(urlstr)
urlstr = strings.ToLower(urlstr)
if strings.HasPrefix(urlstr, "javascript:") || strings.HasPrefix(urlstr, "data:") { // NOT OK
return
}
}