diff --git a/ql/lib/codeql/actions/config/Config.qll b/ql/lib/codeql/actions/config/Config.qll index e298865c468..e3bf239565e 100644 --- a/ql/lib/codeql/actions/config/Config.qll +++ b/ql/lib/codeql/actions/config/Config.qll @@ -128,3 +128,13 @@ predicate vulnerableActionsDataModel( ) { Extensions::vulnerableActionsDataModel(action, vulnerable_version, vulnerable_sha, fixed_version) } + +/** + * MaD models for untrusted git commands + * Fields: + * - cmd_regex: Regular expression for matching untrusted git commands + * - flag: Flag for the command + */ +predicate untrustedGitCommandsDataModel(string cmd_regex, string flag) { + Extensions::untrustedGitCommandsDataModel(cmd_regex, flag) +} diff --git a/ql/lib/codeql/actions/config/ConfigExtensions.qll b/ql/lib/codeql/actions/config/ConfigExtensions.qll index cc1b5553f5f..a32e9c445f2 100644 --- a/ql/lib/codeql/actions/config/ConfigExtensions.qll +++ b/ql/lib/codeql/actions/config/ConfigExtensions.qll @@ -57,3 +57,8 @@ extensible predicate argumentInjectionSinksDataModel( extensible predicate vulnerableActionsDataModel( string action, string vulnerable_version, string vulnerable_sha, string fixed_version ); + +/** + * Holds for git commands that may introduce untrusted data when called on an attacker controlled branch. + */ +extensible predicate untrustedGitCommandsDataModel(string cmd_regex, string flag); diff --git a/ql/lib/codeql/actions/dataflow/FlowSources.qll b/ql/lib/codeql/actions/dataflow/FlowSources.qll index 4682e7b1abf..f1fb2073ed0 100644 --- a/ql/lib/codeql/actions/dataflow/FlowSources.qll +++ b/ql/lib/codeql/actions/dataflow/FlowSources.qll @@ -64,6 +64,88 @@ class GitHubEventCtxSource extends RemoteFlowSource { override string getSourceType() { result = flag } } +abstract class CommandSource extends RemoteFlowSource { + abstract string getCommand(); + + abstract Run getEnclosingRun(); +} + +class GitCommandSource extends RemoteFlowSource, CommandSource { + Run run; + string cmd; + string flag; + + GitCommandSource() { + exists(Step checkout, string cmd_regex | + // This shoould be: + // source instanceof PRHeadCheckoutStep + // but PRHeadCheckoutStep uses Taint Tracking anc causes a non-Monolitic Recursion error + // so we list all the subclasses of PRHeadCheckoutStep here and use actions/checkout as a workaround + // instead of using ActionsMutableRefCheckout and ActionsSHACheckout + ( + exists(Uses uses | + checkout = uses and + uses.getCallee() = "actions/checkout" and + exists(uses.getArgument("ref")) + ) + or + checkout instanceof GitMutableRefCheckout + or + checkout instanceof GitSHACheckout + or + checkout instanceof GhMutableRefCheckout + or + checkout instanceof GhSHACheckout + ) and + this.asExpr() = run.getScriptScalar() and + checkout.getAFollowingStep() = run and + run.getACommand() = cmd and + cmd.indexOf("git") = 0 and + untrustedGitCommandsDataModel(cmd_regex, flag) and + cmd.regexpMatch(cmd_regex) + ) + } + + override string getSourceType() { result = flag } + + override string getCommand() { result = cmd } + + override Run getEnclosingRun() { result = run } +} + +class GitHubEventPathSource extends RemoteFlowSource, CommandSource { + string cmd; + string flag; + string access_path; + Run run; + + // Examples + // COMMENT_AUTHOR=$(jq -r .comment.user.login "$GITHUB_EVENT_PATH") + // CURRENT_COMMENT=$(jq -r .comment.body "$GITHUB_EVENT_PATH") + // PR_HEAD=$(jq --raw-output .pull_request.head.ref ${GITHUB_EVENT_PATH}) + // PR_NUMBER=$(jq --raw-output .pull_request.number ${GITHUB_EVENT_PATH}) + // PR_TITLE=$(jq --raw-output .pull_request.title ${GITHUB_EVENT_PATH}) + // BODY=$(jq -r '.issue.body' "$GITHUB_EVENT_PATH" | sed -n '3p') + GitHubEventPathSource() { + this.asExpr() = run.getScriptScalar() and + run.getACommand() = cmd and + cmd.matches("jq%") and + cmd.matches("%GITHUB_EVENT_PATH%") and + exists(string regexp | + untrustedEventPropertiesDataModel(regexp, flag) and + not flag = "json" and + access_path = "github.event" + cmd.regexpCapture(".*\\s+([^\\s]+)\\s+.*", 1) and + normalizeExpr(access_path).regexpMatch("(?i)\\s*" + wrapRegexp(regexp) + ".*") + ) + } + + override string getSourceType() { result = flag } + + override string getCommand() { result = cmd } + + override Run getEnclosingRun() { result = run } +} + class GitHubEventJsonSource extends RemoteFlowSource { string flag; @@ -104,10 +186,12 @@ class MaDSource extends RemoteFlowSource { override string getSourceType() { result = sourceType } } +abstract class FileSource extends RemoteFlowSource { } + /** * A downloaded artifact. */ -class ArtifactSource extends RemoteFlowSource { +class ArtifactSource extends RemoteFlowSource, FileSource { ArtifactSource() { this.asExpr() instanceof UntrustedArtifactDownloadStep } override string getSourceType() { result = "artifact" } @@ -116,8 +200,27 @@ class ArtifactSource extends RemoteFlowSource { /** * A file from an untrusted checkout. */ -private class CheckoutSource extends RemoteFlowSource { - CheckoutSource() { this.asExpr() instanceof PRHeadCheckoutStep } +private class CheckoutSource extends RemoteFlowSource, FileSource { + CheckoutSource() { + // This shoould be: + // source instanceof PRHeadCheckoutStep + // but PRHeadCheckoutStep uses Taint Tracking anc causes a non-Monolitic Recursion error + // so we list all the subclasses of PRHeadCheckoutStep here and use actions/checkout as a workaround + // instead of using ActionsMutableRefCheckout and ActionsSHACheckout + exists(Uses u | + this.asExpr() = u and + u.getCallee() = "actions/checkout" and + exists(u.getArgument("ref")) + ) + or + this.asExpr() instanceof GitMutableRefCheckout + or + this.asExpr() instanceof GitSHACheckout + or + this.asExpr() instanceof GhMutableRefCheckout + or + this.asExpr() instanceof GhSHACheckout + } override string getSourceType() { result = "artifact" } } diff --git a/ql/lib/ext/config/untrusted_git_commands.yml b/ql/lib/ext/config/untrusted_git_commands.yml new file mode 100644 index 00000000000..0d6c9e3bfa0 --- /dev/null +++ b/ql/lib/ext/config/untrusted_git_commands.yml @@ -0,0 +1,32 @@ +extensions: + - addsTo: + pack: github/actions-all + extensible: untrustedGitCommandsDataModel + data: + # FILES=$(git diff-tree --no-commit-id --name-only HEAD -r) + - [".*git\\b.*\\bdiff-tree\\b.*", "filename,multiline"] + # CHANGES=$(git --no-pager diff --name-only $NAME | grep -v -f .droneignore); + # CHANGES=$(git diff --name-only) + - [".*git\\b.*\\bdiff\\b.*", "filename,multiline"] + # COMMIT_MESSAGE=$(git log --format=%s -n 1) + - [".*git\\b.*\\blog\\b.*%s.*", "text,online"] + # COMMIT_MESSAGE=$(git log --format=%B -n 1) + - [".*git\\b.*\\blog\\b.*%B.*", "text,multiline"] + # COMMIT_MESSAGE=$(git log --format=oneline) + - [".*git\\b.*\\blog\\b.*oneline.*", "text,oneline"] + # COMMIT_MESSAGE=$(git show -s --format=%B) + # COMMIT_MESSAGE=$(git show -s --format=%s) + - [".*git\\b.*\\bshow\\b.*-s.*%s.*", "text,oneline"] + - [".*git\\b.*\\bshow\\b.*-s.*%B.*", "text,multiline"] + # AUTHOR=$(git log -1 --pretty=format:'%an') + - [".*git\\b.*\\blog\\b.*%an.*", "username,oneline"] + # AUTHOR=$(git show -s --pretty=%an) + - [".*git\\b.*\\bshow\\b.*%an.*", "username,oneline"] + # EMAIL=$(git log -1 --pretty=format:'%ae') + - [".*git\\b.*\\blog\\b.*%ae.*", "email,oneline"] + # EMAIL=$(git show -s --pretty=%ae) + - [".*git\\b.*\\bshow\\b.*%ae.*", "email,oneline"] + # BRANCH=$(git branch --show-current) + - [".*git\\b.*\\bbranch\\b.*\\b--show-current\\b.*", "branch,oneline"] + # BRANCH=$(git rev-parse --abbrev-ref HEAD) + - [".*git\\b.*\\brev-parse\\b.*\\b--abbrev-ref\\b.*", "branch,oneline"]