From 3a7910da5a71e4dd6d7e59808582840c8e4bc25f Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Thu, 5 Mar 2020 14:55:40 +0000 Subject: [PATCH 1/5] Introduce (un-)marshaling functions as a concept and instantiate it with the functions in `encoding/json`. --- ql/src/semmle/go/Concepts.qll | 80 +++++++++++++++++++ ql/src/semmle/go/frameworks/Stdlib.qll | 25 +++++- .../go/security/StringBreakCustomizations.qll | 9 +-- 3 files changed, 106 insertions(+), 8 deletions(-) diff --git a/ql/src/semmle/go/Concepts.qll b/ql/src/semmle/go/Concepts.qll index 03950b66654..69306a1a724 100644 --- a/ql/src/semmle/go/Concepts.qll +++ b/ql/src/semmle/go/Concepts.qll @@ -589,3 +589,83 @@ module LoggerCall { abstract DataFlow::Node getAMessageComponent(); } } + +/** + * A function that encodes data into a binary or textual format. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `MarshalingFunction::Range` instead. + */ +class MarshalingFunction extends Function { + MarshalingFunction::Range self; + + MarshalingFunction() { this = self } + + /** Gets an input that is encoded by this function. */ + DataFlow::FunctionInput getAnInput() { result = self.getAnInput() } + + /** Gets the output that contains the encoded data produced by this function. */ + DataFlow::FunctionOutput getOutput() { result = self.getOutput() } + + /** Gets an identifier for the format this function encodes into, such as "JSON". */ + string getFormat() { result = self.getFormat() } +} + +module MarshalingFunction { + /** + * A function that encodes data into a binary or textual format. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `MarshalingFunction` instead. + */ + abstract class Range extends Function { + /** Gets an input that is encoded by this function. */ + abstract DataFlow::FunctionInput getAnInput(); + + /** Gets the output that contains the encoded data produced by this function. */ + abstract DataFlow::FunctionOutput getOutput(); + + /** Gets an identifier for the format this function encodes into, such as "JSON". */ + abstract string getFormat(); + } +} + +/** + * A function that decodes data from a binary or textual format. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `UnmarshalingFunction::Range` instead. + */ +class UnmarshalingFunction extends Function { + UnmarshalingFunction::Range self; + + UnmarshalingFunction() { this = self } + + /** Gets an input that is decoded by this function. */ + DataFlow::FunctionInput getAnInput() { result = self.getAnInput() } + + /** Gets the output that contains the decoded data produced by this function. */ + DataFlow::FunctionOutput getOutput() { result = self.getOutput() } + + /** Gets an identifier for the format this function decodes from, such as "JSON". */ + string getFormat() { result = self.getFormat() } +} + +module UnmarshalingFunction { + /** + * A function that decodes data from a binary or textual format. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `UnmarshalingFunction` instead. + */ + abstract class Range extends Function { + /** Gets an input that is decoded by this function. */ + abstract DataFlow::FunctionInput getAnInput(); + + /** Gets the output that contains the decoded data produced by this function. */ + abstract DataFlow::FunctionOutput getOutput(); + + /** Gets an identifier for the format this function decodes from, such as "JSON". */ + abstract string getFormat(); + } +} diff --git a/ql/src/semmle/go/frameworks/Stdlib.qll b/ql/src/semmle/go/frameworks/Stdlib.qll index 63d66ad5778..48b4d43669f 100644 --- a/ql/src/semmle/go/frameworks/Stdlib.qll +++ b/ql/src/semmle/go/frameworks/Stdlib.qll @@ -519,16 +519,35 @@ module Log { /** Provides models of some functions in the `encoding/json` package. */ module EncodingJson { - private class MarshalFunction extends TaintTracking::FunctionModel { + private class MarshalFunction extends TaintTracking::FunctionModel, MarshalingFunction::Range { MarshalFunction() { this.hasQualifiedName("encoding/json", "Marshal") or this.hasQualifiedName("encoding/json", "MarshalIndent") } override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { - inp.isParameter(0) and - outp.isResult(0) + inp = getAnInput() and outp = getOutput() } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isResult(0) } + + override string getFormat() { result = "JSON" } + } + + private class UnmarshalFunction extends TaintTracking::FunctionModel, UnmarshalingFunction::Range { + UnmarshalFunction() { this.hasQualifiedName("encoding/json", "Unmarshal") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(1) } + + override string getFormat() { result = "JSON" } } } diff --git a/ql/src/semmle/go/security/StringBreakCustomizations.qll b/ql/src/semmle/go/security/StringBreakCustomizations.qll index 0e58ee15efd..b2330f59d18 100644 --- a/ql/src/semmle/go/security/StringBreakCustomizations.qll +++ b/ql/src/semmle/go/security/StringBreakCustomizations.qll @@ -48,9 +48,8 @@ module StringBreak { /** A call to `json.Marshal`, considered as a taint source for unsafe quoting. */ class JsonMarshalAsSource extends Source { JsonMarshalAsSource() { - exists(Function jsonMarshal | jsonMarshal.hasQualifiedName("encoding/json", "Marshal") | - // we are only interested in the first result (the second result is an error) - this = DataFlow::extractTupleElement(jsonMarshal.getACall(), 0) + exists(MarshalingFunction jsonMarshal | jsonMarshal.getFormat() = "JSON" | + this = jsonMarshal.getOutput().getNode(jsonMarshal.getACall()) ) } } @@ -72,8 +71,8 @@ module StringBreak { /** A call to `json.Unmarshal`, considered as a sanitizer for unsafe quoting. */ class UnmarshalSanitizer extends Sanitizer { UnmarshalSanitizer() { - exists(Function jsonUnmarshal | jsonUnmarshal.hasQualifiedName("encoding/json", "Unmarshal") | - this = jsonUnmarshal.getACall() + exists(UnmarshalingFunction jsonUnmarshal | jsonUnmarshal.getFormat() = "JSON" | + this = jsonUnmarshal.getOutput().getNode(jsonUnmarshal.getACall()) ) } } From f599243a348ab911aadbe78d811a67da1b892640 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 6 Mar 2020 14:18:35 +0000 Subject: [PATCH 2/5] Conflate references and referents more thoroughly in taint tracking. --- .../go/dataflow/internal/DataFlowUtil.qll | 4 +++ .../dataflow/internal/TaintTrackingUtil.qll | 27 ++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll index 310dd496518..439e1c0d40d 100644 --- a/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll +++ b/ql/src/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -392,6 +392,10 @@ class PostUpdateNode extends Node { ( preupd instanceof AddressOperationNode or + preupd = any(AddressOperationNode addr).getOperand() + or + preupd = any(PointerDereferenceNode deref).getOperand() + or exists(Write w, DataFlow::Node base | w.writesField(base, _, _) | preupd = base or diff --git a/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll index 23ec8757d10..772294a6f11 100644 --- a/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll +++ b/ql/src/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -59,11 +59,32 @@ predicate localAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { any(AdditionalTaintStep a).step(pred, succ) } -/** Holds if taint flows from `pred` to `succ` via a reference or dereference. */ +/** + * Holds if taint flows from `pred` to `succ` via a reference or dereference. + * + * The taint-tracking library does not distinguish between a reference and its referent, + * treating one as tainted if the other is. + */ predicate referenceStep(DataFlow::Node pred, DataFlow::Node succ) { - succ.(DataFlow::AddressOperationNode).getOperand() = pred + exists(DataFlow::AddressOperationNode addr | + // from `x` to `&x` + pred = addr.getOperand() and + succ = addr + or + // from `&x` to `x` + pred = addr and + succ.(DataFlow::PostUpdateNode).getPreUpdateNode() = addr.getOperand() + ) or - succ.(DataFlow::PointerDereferenceNode).getOperand() = pred + exists(DataFlow::PointerDereferenceNode deref | + // from `x` to `*x` + pred = deref.getOperand() and + succ = deref + or + // from `*x` to `x` + pred = deref and + succ.(DataFlow::PostUpdateNode).getPreUpdateNode() = deref.getOperand() + ) } /** Holds if taint flows from `pred` to `succ` via a field read. */ From bf6865b96ad782e03a79e43489e6a597025ab16c Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 6 Mar 2020 14:18:59 +0000 Subject: [PATCH 3/5] Add model of ioutil.ReadAll --- ql/src/semmle/go/frameworks/Stdlib.qll | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ql/src/semmle/go/frameworks/Stdlib.qll b/ql/src/semmle/go/frameworks/Stdlib.qll index 48b4d43669f..07f2fde62b6 100644 --- a/ql/src/semmle/go/frameworks/Stdlib.qll +++ b/ql/src/semmle/go/frameworks/Stdlib.qll @@ -108,6 +108,20 @@ module IoUtil { override DataFlow::Node getAPathArgument() { result = getArgument(0) } } + + /** + * A taint model of the `ioutil.ReadAll` function, recording that it propagates taint + * from its first argument to its first result. + */ + private class ReadAll extends TaintTracking::FunctionModel { + ReadAll() { + hasQualifiedName("io/ioutil", "ReadAll") + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isParameter(0) and outp.isResult(0) + } + } } /** Provides models of commonly used functions in the `os` package. */ From bcb9ce2498d52214ca4fde9bab8a6b415c350eff Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 6 Mar 2020 14:19:10 +0000 Subject: [PATCH 4/5] Add another test for StringBreak. --- ql/test/query-tests/Security/CWE-089/tst.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/ql/test/query-tests/Security/CWE-089/tst.go b/ql/test/query-tests/Security/CWE-089/tst.go index af0d7e6cf79..403d29a53e4 100644 --- a/ql/test/query-tests/Security/CWE-089/tst.go +++ b/ql/test/query-tests/Security/CWE-089/tst.go @@ -10,5 +10,19 @@ func marshal(version interface{}) string { if err != nil { return fmt.Sprintf("error: '%v'", err) // OK } - return versionJSON + return string(versionJSON) +} + +type StringWrapper struct { + s string +} + +func marshalUnmarshal(w1 StringWrapper) (string, error) { + buf, err := json.Marshal(w1) + if err != nil { + return "", err + } + var w2 StringWrapper + json.Unmarshal(buf, &w2) + return fmt.Sprintf("wrapped string: '%s'", w2.s), nil // OK } From 1be0cc57a808dd40a5522df6476e1371f7463be4 Mon Sep 17 00:00:00 2001 From: Max Schaefer Date: Fri, 6 Mar 2020 14:24:51 +0000 Subject: [PATCH 5/5] Add test case from https://github.com/github/codeql-go/issues/48. --- .../Security/CWE-089/SqlInjection.expected | 14 ++++++ .../query-tests/Security/CWE-089/issue48.go | 47 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 ql/test/query-tests/Security/CWE-089/issue48.go diff --git a/ql/test/query-tests/Security/CWE-089/SqlInjection.expected b/ql/test/query-tests/Security/CWE-089/SqlInjection.expected index 60a68b5f652..86e127c29eb 100644 --- a/ql/test/query-tests/Security/CWE-089/SqlInjection.expected +++ b/ql/test/query-tests/Security/CWE-089/SqlInjection.expected @@ -1,5 +1,9 @@ edges | SqlInjection.go:11:3:11:9 | selection of URL : pointer type | SqlInjection.go:12:11:12:11 | q | +| issue48.go:22:25:22:32 | selection of Body : ReadCloser | issue48.go:27:11:27:12 | q3 | +| issue48.go:32:26:32:33 | selection of Body : ReadCloser | issue48.go:37:11:37:12 | q4 | +| issue48.go:42:17:42:50 | type conversion : slice type | issue48.go:46:11:46:12 | q5 | +| issue48.go:42:24:42:30 | selection of URL : pointer type | issue48.go:42:17:42:50 | type conversion : slice type | | main.go:10:11:10:16 | selection of Form : Values | main.go:10:11:10:28 | index expression | | main.go:14:63:14:67 | selection of URL : pointer type | main.go:14:11:14:84 | call to Sprintf | | main.go:15:63:15:84 | call to Get : string | main.go:15:11:15:85 | call to Sprintf | @@ -40,6 +44,13 @@ edges nodes | SqlInjection.go:11:3:11:9 | selection of URL : pointer type | semmle.label | selection of URL : pointer type | | SqlInjection.go:12:11:12:11 | q | semmle.label | q | +| issue48.go:22:25:22:32 | selection of Body : ReadCloser | semmle.label | selection of Body : ReadCloser | +| issue48.go:27:11:27:12 | q3 | semmle.label | q3 | +| issue48.go:32:26:32:33 | selection of Body : ReadCloser | semmle.label | selection of Body : ReadCloser | +| issue48.go:37:11:37:12 | q4 | semmle.label | q4 | +| issue48.go:42:17:42:50 | type conversion : slice type | semmle.label | type conversion : slice type | +| issue48.go:42:24:42:30 | selection of URL : pointer type | semmle.label | selection of URL : pointer type | +| issue48.go:46:11:46:12 | q5 | semmle.label | q5 | | main.go:10:11:10:16 | selection of Form : Values | semmle.label | selection of Form : Values | | main.go:10:11:10:28 | index expression | semmle.label | index expression | | main.go:14:11:14:84 | call to Sprintf | semmle.label | call to Sprintf | @@ -83,6 +94,9 @@ nodes | main.go:61:11:61:11 | q | semmle.label | q | #select | SqlInjection.go:12:11:12:11 | q | SqlInjection.go:11:3:11:9 | selection of URL : pointer type | SqlInjection.go:12:11:12:11 | q | This query depends on $@. | SqlInjection.go:11:3:11:9 | selection of URL | a user-provided value | +| issue48.go:27:11:27:12 | q3 | issue48.go:22:25:22:32 | selection of Body : ReadCloser | issue48.go:27:11:27:12 | q3 | This query depends on $@. | issue48.go:22:25:22:32 | selection of Body | a user-provided value | +| issue48.go:37:11:37:12 | q4 | issue48.go:32:26:32:33 | selection of Body : ReadCloser | issue48.go:37:11:37:12 | q4 | This query depends on $@. | issue48.go:32:26:32:33 | selection of Body | a user-provided value | +| issue48.go:46:11:46:12 | q5 | issue48.go:42:24:42:30 | selection of URL : pointer type | issue48.go:46:11:46:12 | q5 | This query depends on $@. | issue48.go:42:24:42:30 | selection of URL | a user-provided value | | main.go:10:11:10:28 | index expression | main.go:10:11:10:16 | selection of Form : Values | main.go:10:11:10:28 | index expression | This query depends on $@. | main.go:10:11:10:16 | selection of Form | a user-provided value | | main.go:14:11:14:84 | call to Sprintf | main.go:14:63:14:67 | selection of URL : pointer type | main.go:14:11:14:84 | call to Sprintf | This query depends on $@. | main.go:14:63:14:67 | selection of URL | a user-provided value | | main.go:15:11:15:85 | call to Sprintf | main.go:15:63:15:84 | call to Get : string | main.go:15:11:15:85 | call to Sprintf | This query depends on $@. | main.go:15:63:15:84 | call to Get | a user-provided value | diff --git a/ql/test/query-tests/Security/CWE-089/issue48.go b/ql/test/query-tests/Security/CWE-089/issue48.go new file mode 100644 index 00000000000..a3f7ca42c05 --- /dev/null +++ b/ql/test/query-tests/Security/CWE-089/issue48.go @@ -0,0 +1,47 @@ +package main + +// see https://github.com/github/codeql-go/issues/48 + +import ( + "database/sql" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" +) + +type RequestStruct struct { + Id int64 `db:"id"` + Category []string `db:"category"` +} + +func handler(db *sql.DB, req *http.Request) { + // read data from request body and unmarshal to a indeterminacy struct + // POST: {"a": "b", "category": "test"} + var RequestDataFromJson map[string]interface{} + b, _ := ioutil.ReadAll(req.Body) + json.Unmarshal(b, &RequestDataFromJson) + + q3 := fmt.Sprintf("SELECT ITEM,PRICE FROM PRODUCT WHERE ITEM_CATEGORY='%s' ORDER BY PRICE", + RequestDataFromJson["category"]) + db.Query(q3) // NOT OK + + // read data from request body and unmarshal to a determined struct + // POST: {"id": "1", "category": "test"} + var RequestDataFromJson2 RequestStruct + b2, _ := ioutil.ReadAll(req.Body) + json.Unmarshal(b2, &RequestDataFromJson2) + + q4 := fmt.Sprintf("SELECT ITEM,PRICE FROM PRODUCT WHERE ITEM_CATEGORY='%s' ORDER BY PRICE", + RequestDataFromJson2.Category) + db.Query(q4) // NOT OK + + // read json data from a url parameter + // GET: ?json={"id": 1, "category": "test"} + var RequestDataFromJson3 RequestStruct + json.Unmarshal([]byte(req.URL.Query()["json"][0]), &RequestDataFromJson3) + + q5 := fmt.Sprintf("SELECT ITEM,PRICE FROM PRODUCT WHERE ITEM_CATEGORY='%s' ORDER BY PRICE", + RequestDataFromJson3.Category) + db.Query(q5) // NOT OK +}