From ebaac5f5cb16ec9aba60e2fdc75bba13b08811ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvaro=20Mu=C3=B1oz?= Date: Wed, 14 Feb 2024 14:03:11 +0100 Subject: [PATCH 1/4] fix: enforce input,output,env prefixes in MaD --- .../codeql/actions/dataflow/ExternalFlow.qll | 26 +++++++++++-------- ql/lib/ext/PLACEHOLDER.model.yml | 7 +++++ .../frabert_replace-string-action.model.yml | 4 +-- ..._actions-find-and-replace-string.model.yml | 4 +-- 4 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 ql/lib/ext/PLACEHOLDER.model.yml diff --git a/ql/lib/codeql/actions/dataflow/ExternalFlow.qll b/ql/lib/codeql/actions/dataflow/ExternalFlow.qll index 38b964110c7..6446fbb5572 100644 --- a/ql/lib/codeql/actions/dataflow/ExternalFlow.qll +++ b/ql/lib/codeql/actions/dataflow/ExternalFlow.qll @@ -50,22 +50,22 @@ predicate externallyDefinedSource(DataFlow::Node source, string sourceType, stri ) and ( if fieldName.trim().matches("env.%") - then source.asExpr() = uses.getEnvExpr(fieldName.trim().replaceAll("env\\.", "")) + then source.asExpr() = uses.getEnvExpr(fieldName.trim().replaceAll("env.", "")) else if fieldName.trim().matches("output.%") - then - // 'output.' is the default qualifier - source.asExpr() = uses + then source.asExpr() = uses else none() ) and sourceType = kind ) } -predicate externallyDefinedSummary(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) { +predicate externallyDefinedStoreStep( + DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c +) { exists(UsesExpr uses, string action, string version, string input, string output | - c = any(DataFlow::FieldContent ct | ct.getName() = output.replaceAll("output\\.", "")) and summaryModel(action, version, input, output, "taint") and + c = any(DataFlow::FieldContent ct | ct.getName() = output.replaceAll("output.", "")) and uses.getCallee() = action.toLowerCase() and ( if version.trim() = "*" @@ -74,10 +74,11 @@ predicate externallyDefinedSummary(DataFlow::Node pred, DataFlow::Node succ, Dat ) and ( if input.trim().matches("env.%") - then pred.asExpr() = uses.getEnvExpr(input.trim().replaceAll("env\\.", "")) + then pred.asExpr() = uses.getEnvExpr(input.trim().replaceAll("env.", "")) else - // 'input.' is the default qualifier - pred.asExpr() = uses.getArgumentExpr(input.trim().replaceAll("input\\.", "")) + if input.trim().matches("input.%") + then pred.asExpr() = uses.getArgumentExpr(input.trim().replaceAll("input.", "")) + else none() ) and succ.asExpr() = uses ) @@ -87,8 +88,11 @@ predicate externallyDefinedSink(DataFlow::ExprNode sink, string kind) { exists(UsesExpr uses, string action, string version, string input | ( if input.trim().matches("env.%") - then sink.asExpr() = uses.getEnvExpr(input.trim().replaceAll("input\\.", "")) - else sink.asExpr() = uses.getArgumentExpr(input.trim()) + then sink.asExpr() = uses.getEnvExpr(input.trim().replaceAll("env.", "")) + else + if input.trim().matches("input.%") + then sink.asExpr() = uses.getArgumentExpr(input.trim().replaceAll("input.", "")) + else none() ) and sinkModel(action, version, input, kind) and uses.getCallee() = action.toLowerCase() and diff --git a/ql/lib/ext/PLACEHOLDER.model.yml b/ql/lib/ext/PLACEHOLDER.model.yml new file mode 100644 index 00000000000..ef916067967 --- /dev/null +++ b/ql/lib/ext/PLACEHOLDER.model.yml @@ -0,0 +1,7 @@ +extensions: + - addsTo: + pack: codeql/actions-all + extensible: sinkModel + data: + - ["","","",""] + diff --git a/ql/lib/ext/frabert_replace-string-action.model.yml b/ql/lib/ext/frabert_replace-string-action.model.yml index 76ce81b394e..79fd5c76e4a 100644 --- a/ql/lib/ext/frabert_replace-string-action.model.yml +++ b/ql/lib/ext/frabert_replace-string-action.model.yml @@ -3,5 +3,5 @@ extensions: pack: codeql/actions-all extensible: summaryModel data: - - ["frabert/replace-string-action", "*", "string", "replaced", "taint"] - - ["frabert/replace-string-action", "*", "replace-with", "replaced", "taint"] + - ["frabert/replace-string-action", "*", "input.string", "output.replaced", "taint"] + - ["frabert/replace-string-action", "*", "input.replace-with", "output.replaced", "taint"] diff --git a/ql/lib/ext/mad9000_actions-find-and-replace-string.model.yml b/ql/lib/ext/mad9000_actions-find-and-replace-string.model.yml index 46a577d2f7e..332527813a4 100644 --- a/ql/lib/ext/mad9000_actions-find-and-replace-string.model.yml +++ b/ql/lib/ext/mad9000_actions-find-and-replace-string.model.yml @@ -3,5 +3,5 @@ extensions: pack: codeql/actions-all extensible: summaryModel data: - - ["mad9000/actions-find-and-replace-string", "*", "source", "value", "taint"] - - ["mad9000/actions-find-and-replace-string", "*", "replace", "value", "taint"] \ No newline at end of file + - ["mad9000/actions-find-and-replace-string", "*", "input.source", "output.value", "taint"] + - ["mad9000/actions-find-and-replace-string", "*", "input.replace", "output.value", "taint"] From 494fb2470e1c399699b4dab1176ac573f9947ac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvaro=20Mu=C3=B1oz?= Date: Wed, 14 Feb 2024 14:05:13 +0100 Subject: [PATCH 2/4] fix: refactor local, read and store steps --- ql/lib/codeql/actions/dataflow/FlowSteps.qll | 2 +- .../dataflow/internal/DataFlowPrivate.qll | 74 ++++++------------- 2 files changed, 22 insertions(+), 54 deletions(-) diff --git a/ql/lib/codeql/actions/dataflow/FlowSteps.qll b/ql/lib/codeql/actions/dataflow/FlowSteps.qll index 9def461900e..faa7c4c3ebe 100644 --- a/ql/lib/codeql/actions/dataflow/FlowSteps.qll +++ b/ql/lib/codeql/actions/dataflow/FlowSteps.qll @@ -40,7 +40,7 @@ class AdditionalTaintStep extends Unit { * echo "foo=$(echo $TAINTED)" >> $GITHUB_OUTPUT * echo "test=${{steps.step1.outputs.MSG}}" >> "$GITHUB_OUTPUT" */ -predicate runEnvToScriptstep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) { +predicate runEnvToScriptStoreStep(DataFlow::Node pred, DataFlow::Node succ, DataFlow::ContentSet c) { exists(RunExpr r, string varName, string output | c = any(DataFlow::FieldContent ct | ct.getName() = output.replaceAll("output\\.", "")) and r.getEnvExpr(varName) = pred.asExpr() and diff --git a/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll b/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll index 55fda038789..045910ed676 100644 --- a/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll +++ b/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll @@ -133,9 +133,9 @@ predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } newtype TContent = TFieldContent(string name) { + // We only use field flow for steps and jobs outputs, not for accessing other context fields such as jobs, env or inputs name = any(StepsCtxAccessExpr a).getFieldName() or - name = any(NeedsCtxAccessExpr a).getFieldName() or - name = any(JobsCtxAccessExpr a).getFieldName() + name = any(NeedsCtxAccessExpr a).getFieldName() } /** @@ -188,11 +188,12 @@ class ArgumentPosition extends string { predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos } /** - * Holds if there is a local flow step between a ${{}} expression accesing a step output variable and the step output itself - * But only for those cases where the step output is defined externally in a MaD specification. - * The reason for this is that we don't currently have a way to specify that a source starts with a non-empty access - * path so the easiest thing is to add the corresponding read steps of that field as local flow steps as well. - * e.g. ${{ steps.step1.output.foo }} + * Holds if there is a local flow step between a ${{ steps.xxx.outputs.yyy }} expression accesing a step output field + * and the step output itself. But only for those cases where the step output is defined externally in a MaD Source + * specification. The reason for this is that we don't currently have a way to specify that a source starts with a + * non-empty access path so we cannot write a Source that stores the taint in a Content, we can only do that for steps + * (storeStep). The easiest thing is to add this local flow step that simulates a read step from the source node for a specific + * field name. */ predicate stepsCtxLocalStep(Node nodeFrom, Node nodeTo) { exists(StepStmt astFrom, StepsCtxAccessExpr astTo | @@ -204,19 +205,6 @@ predicate stepsCtxLocalStep(Node nodeFrom, Node nodeTo) { ) } -/** - * Holds if there is a local flow step between a ${{}} expression accesing a job output variable and the job output itself - * e.g. ${{ needs.job1.output.foo }} or ${{ jobs.job1.output.foo }} - */ -predicate jobsCtxLocalStep(Node nodeFrom, Node nodeTo) { - exists(Expression astFrom, CtxAccessExpr astTo | - astFrom = nodeFrom.asExpr() and - astTo = nodeTo.asExpr() and - astTo.getRefExpr() = astFrom and - (astTo instanceof NeedsCtxAccessExpr or astTo instanceof JobsCtxAccessExpr) - ) -} - /** * Holds if there is a local flow step between a ${{}} expression accesing an input variable and the input itself * e.g. ${{ inputs.foo }} @@ -252,7 +240,6 @@ predicate envCtxLocalStep(Node nodeFrom, Node nodeTo) { pragma[nomagic] predicate localFlowStep(Node nodeFrom, Node nodeTo) { stepsCtxLocalStep(nodeFrom, nodeTo) or - jobsCtxLocalStep(nodeFrom, nodeTo) or inputsCtxLocalStep(nodeFrom, nodeTo) or envCtxLocalStep(nodeFrom, nodeTo) } @@ -272,17 +259,12 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { localFlowStep(nodeFr */ predicate jumpStep(Node nodeFrom, Node nodeTo) { none() } -/** - * A read step to read the value of a ReusableWork uses step and connect it to its - * corresponding JobOutputAccessExpr - */ -predicate reusableWorkflowReturnReadStep(Node node1, Node node2, ContentSet c) { - exists(NeedsCtxAccessExpr expr, string fieldName | - expr.usesReusableWorkflow() and - expr.getRefExpr() = node1.asExpr() and - expr.getFieldName() = fieldName and - expr = node2.asExpr() and - c = any(FieldContent ct | ct.getName() = fieldName) +predicate ctxFieldReadStep(Node node1, Node node2, ContentSet c) { + exists(CtxAccessExpr access | + (access instanceof NeedsCtxAccessExpr or access instanceof StepsCtxAccessExpr) and + c = any(FieldContent ct | ct.getName() = access.getFieldName()) and + node1.asExpr() = access.getRefExpr() and + node2.asExpr() = access ) } @@ -291,24 +273,14 @@ predicate reusableWorkflowReturnReadStep(Node node1, Node node2, ContentSet c) { * `node1` references an object with a content `c.getAReadContent()` whose * value ends up in `node2`. */ -predicate readStep(Node node1, ContentSet c, Node node2) { - // TODO: Extract to its own predicate - exists(StepsCtxAccessExpr access | - c = any(FieldContent ct | ct.getName() = access.getFieldName()) and - node1.asExpr() = access.getRefExpr() and - node2.asExpr() = access - ) - or - reusableWorkflowReturnReadStep(node1, node2, c) -} +predicate readStep(Node node1, ContentSet c, Node node2) { ctxFieldReadStep(node1, node2, c) } /** - * A store step to store the value of a ReusableWorkflowStmt output expr into the return node (node2) + * A store step to store an output expression (node1) into its OutputsStm node (node2) * with a given access path (fieldName) */ -predicate reusableWorkflowReturnStoreStep(Node node1, Node node2, ContentSet c) { - exists(ReusableWorkflowStmt stmt, OutputsStmt out, string fieldName | - out = stmt.getOutputsStmt() and +predicate fieldStoreStep(Node node1, Node node2, ContentSet c) { + exists(OutputsStmt out, string fieldName | node1.asExpr() = out.getOutputExpr(fieldName) and node2.asExpr() = out and c = any(FieldContent ct | ct.getName() = fieldName) @@ -321,13 +293,9 @@ predicate reusableWorkflowReturnStoreStep(Node node1, Node node2, ContentSet c) * contains the value of `node1`. */ predicate storeStep(Node node1, ContentSet c, Node node2) { - reusableWorkflowReturnStoreStep(node1, node2, c) - or - // TODO: rename to xxxxStoreStep - externallyDefinedSummary(node1, node2, c) - or - // TODO: rename to xxxxStoreStep - runEnvToScriptstep(node1, node2, c) + fieldStoreStep(node1, node2, c) or + externallyDefinedStoreStep(node1, node2, c) or + runEnvToScriptStoreStep(node1, node2, c) } /** From 90d1ae4a05208f1b6a7c1b16f860e72b232288c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvaro=20Mu=C3=B1oz?= Date: Wed, 14 Feb 2024 14:06:28 +0100 Subject: [PATCH 3/4] fix: simplify Ast --- ql/lib/codeql/actions/Ast.qll | 30 +++---------------- .../actions/controlflow/internal/Cfg.qll | 6 +--- 2 files changed, 5 insertions(+), 31 deletions(-) diff --git a/ql/lib/codeql/actions/Ast.qll b/ql/lib/codeql/actions/Ast.qll index 087b7f19e62..e5f9e35a4a9 100644 --- a/ql/lib/codeql/actions/Ast.qll +++ b/ql/lib/codeql/actions/Ast.qll @@ -149,7 +149,7 @@ class JobStmt extends Statement instanceof Actions::Job { * out1: ${steps.foo.bar} * out2: ${steps.foo.baz} */ - JobOutputStmt getOutputStmt() { result = this.(Actions::Job).lookup("outputs") } + OutputsStmt getOutputsStmt() { result = this.(Actions::Job).lookup("outputs") } /** * Reusable workflow jobs may have Uses children @@ -166,28 +166,6 @@ class JobStmt extends Statement instanceof Actions::Job { } } -/** - * Declaration of the outputs for the job. - * eg: - * out1: ${steps.foo.bar} - * out2: ${steps.foo.baz} - */ -class JobOutputStmt extends Statement instanceof YamlMapping { - JobStmt job; - - JobOutputStmt() { job.(YamlMapping).lookup("outputs") = this } - - YamlMapping asYamlMapping() { result = this } - - /** - * Gets a specific value expression - * eg: ${steps.foo.bar} - */ - Expression getOutputExpr(string id) { - this.(YamlMapping).maps(any(YamlScalar s | s.getValue() = id), result) - } -} - /** * A Step is a single task that can be executed as part of a job. */ @@ -435,9 +413,9 @@ class NeedsCtxAccessExpr extends CtxAccessExpr { job.getLocation().getFile() = this.getLocation().getFile() and ( // regular jobs - job.getOutputStmt().getOutputExpr(fieldName) = result + job.getOutputsStmt() = result or - // jobs calling reusable workflows + // reusable workflow calling jobs job.getUsesExpr() = result ) } @@ -464,7 +442,7 @@ class JobsCtxAccessExpr extends CtxAccessExpr { exists(JobStmt job | job.getId() = jobId and job.getLocation().getFile() = this.getLocation().getFile() and - job.getOutputStmt().getOutputExpr(fieldName) = result + job.getOutputsStmt() = result ) } } diff --git a/ql/lib/codeql/actions/controlflow/internal/Cfg.qll b/ql/lib/codeql/actions/controlflow/internal/Cfg.qll index 8d044c827a2..8808fb0afe5 100644 --- a/ql/lib/codeql/actions/controlflow/internal/Cfg.qll +++ b/ql/lib/codeql/actions/controlflow/internal/Cfg.qll @@ -231,7 +231,7 @@ private class JobTree extends StandardPreOrderTree instanceof JobStmt { rank[i](Expression child, Location l | ( child = super.getAStepStmt() or - child = super.getOutputStmt() or + child = super.getOutputsStmt() or child = super.getUsesExpr() ) and l = child.getLocation() @@ -243,10 +243,6 @@ private class JobTree extends StandardPreOrderTree instanceof JobStmt { } } -private class JobOutputTree extends StandardPreOrderTree instanceof JobOutputStmt { - override ControlFlowTree getChildNode(int i) { result = super.asYamlMapping().getValueNode(i) } -} - private class StepUsesTree extends StandardPreOrderTree instanceof StepUsesExpr { override ControlFlowTree getChildNode(int i) { result = From f65587e5cfa8b9d00dcb91d98df4a720bcc384a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alvaro=20Mu=C3=B1oz?= Date: Wed, 14 Feb 2024 17:08:13 +0100 Subject: [PATCH 4/4] feat(fieldflow): Refactor flow through Job outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job output should flow to the “key” (YamlString) and be read from there from the JobOutputAccessExpr. - NeedsCtxAccessExpr.getRefExpr should point to the UsesExpr(RW calling Job) or to the OutputsStmt(Regular Job). - JobsCtxAccessExpr.getRefExpr should point to the OutputsStmt(Regular Job). - Create storeStep from OutputExpr to OutputStmt using output var name as the field name. - Create a readStep for CtxAccessExpr to read the referenced fields from the job outputs. --- .../dataflow/internal/DataFlowPrivate.qll | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll b/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll index 045910ed676..12be2d89998 100644 --- a/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll +++ b/ql/lib/codeql/actions/dataflow/internal/DataFlowPrivate.qll @@ -133,9 +133,10 @@ predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } newtype TContent = TFieldContent(string name) { - // We only use field flow for steps and jobs outputs, not for accessing other context fields such as jobs, env or inputs + // We only use field flow for steps and jobs outputs, not for accessing other context fields such as env or inputs name = any(StepsCtxAccessExpr a).getFieldName() or - name = any(NeedsCtxAccessExpr a).getFieldName() + name = any(NeedsCtxAccessExpr a).getFieldName() or + name = any(JobsCtxAccessExpr a).getFieldName() } /** @@ -196,9 +197,8 @@ predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = * field name. */ predicate stepsCtxLocalStep(Node nodeFrom, Node nodeTo) { - exists(StepStmt astFrom, StepsCtxAccessExpr astTo | + exists(UsesExpr astFrom, StepsCtxAccessExpr astTo | externallyDefinedSource(nodeFrom, _, "output." + astTo.getFieldName()) and - astFrom instanceof UsesExpr and astFrom = nodeFrom.asExpr() and astTo = nodeTo.asExpr() and astTo.getRefExpr() = astFrom @@ -259,9 +259,16 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { localFlowStep(nodeFr */ predicate jumpStep(Node nodeFrom, Node nodeTo) { none() } +/** + * Holds if a CtxAccessExpr reads a field from a job (needs/jobs), step (steps) output via a read of `c` (fieldname) + */ predicate ctxFieldReadStep(Node node1, Node node2, ContentSet c) { exists(CtxAccessExpr access | - (access instanceof NeedsCtxAccessExpr or access instanceof StepsCtxAccessExpr) and + ( + access instanceof NeedsCtxAccessExpr or + access instanceof StepsCtxAccessExpr or + access instanceof JobsCtxAccessExpr + ) and c = any(FieldContent ct | ct.getName() = access.getFieldName()) and node1.asExpr() = access.getRefExpr() and node2.asExpr() = access @@ -272,12 +279,13 @@ predicate ctxFieldReadStep(Node node1, Node node2, ContentSet c) { * Holds if data can flow from `node1` to `node2` via a read of `c`. Thus, * `node1` references an object with a content `c.getAReadContent()` whose * value ends up in `node2`. + * Store steps without corresponding reads are pruned aggressively very early, since they can never contribute to a complete path. */ predicate readStep(Node node1, ContentSet c, Node node2) { ctxFieldReadStep(node1, node2, c) } /** - * A store step to store an output expression (node1) into its OutputsStm node (node2) - * with a given access path (fieldName) + * Stores an output expression (node1) into its OutputsStm node (node2) + * using the output variable name as the access path */ predicate fieldStoreStep(Node node1, Node node2, ContentSet c) { exists(OutputsStmt out, string fieldName | @@ -291,6 +299,7 @@ predicate fieldStoreStep(Node node1, Node node2, ContentSet c) { * Holds if data can flow from `node1` to `node2` via a store into `c`. Thus, * `node2` references an object with a content `c.getAStoreContent()` that * contains the value of `node1`. + * Store steps without corresponding reads are pruned aggressively very early, since they can never contribute to a complete path. */ predicate storeStep(Node node1, ContentSet c, Node node2) { fieldStoreStep(node1, node2, c) or