Merge pull request #19701 from adityasharad/actions/bash-parsing-ranking-performance

Actions: Improve Bash parsing performance on command and string interpolations
This commit is contained in:
Aditya Sharad
2025-06-11 00:59:19 +05:30
committed by GitHub
3 changed files with 183 additions and 42 deletions

View File

@@ -0,0 +1,6 @@
---
category: minorAnalysis
---
* Fixed performance issues in the parsing of Bash scripts in workflow files,
which led to out-of-disk errors when analysing certain workflow files with
complex interpolations of shell commands or quoted strings.

View File

@@ -8,35 +8,64 @@ class BashShellScript extends ShellScript {
)
}
private string lineProducer(int i) {
result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", i)
/**
* Gets the line at 0-based index `lineIndex` within this shell script,
* assuming newlines as separators.
*/
private string lineProducer(int lineIndex) {
result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", lineIndex)
}
private predicate cmdSubstitutionReplacement(string cmdSubs, string id, int k) {
exists(string line | line = this.lineProducer(k) |
exists(int i, int j |
cmdSubs =
// $() cmd substitution
line.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", i, j)
.regexpReplaceAll("^\\$\\(", "")
.regexpReplaceAll("\\)$", "") and
id = "cmdsubs:" + k + ":" + i + ":" + j
)
or
exists(int i, int j |
// `...` cmd substitution
cmdSubs =
line.regexpFind("\\`[^\\`]+\\`", i, j)
.regexpReplaceAll("^\\`", "")
.regexpReplaceAll("\\`$", "") and
id = "cmd:" + k + ":" + i + ":" + j
)
private predicate cmdSubstitutionReplacement(string command, string id, int lineIndex) {
this.commandInSubstitution(lineIndex, command, id)
or
this.commandInBackticks(lineIndex, command, id)
}
/**
* Holds if there is a command substitution `$(command)` in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this command.
*/
private predicate commandInSubstitution(int lineIndex, string command, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
command =
// Look for the command inside a $(...) command substitution
this.lineProducer(lineIndex)
.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", occurrenceIndex,
occurrenceOffset)
// trim starting $( - TODO do this in first regex
.regexpReplaceAll("^\\$\\(", "")
// trim ending ) - TODO do this in first regex
.regexpReplaceAll("\\)$", "") and
id = "cmdsubs:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
)
}
private predicate rankedCmdSubstitutionReplacements(int i, string old, string new) {
old = rank[i](string old2 | this.cmdSubstitutionReplacement(old2, _, _) | old2) and
this.cmdSubstitutionReplacement(old, new, _)
/**
* Holds if `command` is a command in backticks `` `...` `` in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this command.
*/
private predicate commandInBackticks(int lineIndex, string command, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
command =
this.lineProducer(lineIndex)
.regexpFind("\\`[^\\`]+\\`", occurrenceIndex, occurrenceOffset)
// trim leading backtick - TODO do this in first regex
.regexpReplaceAll("^\\`", "")
// trim trailing backtick - TODO do this in first regex
.regexpReplaceAll("\\`$", "") and
id = "cmd:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
)
}
private predicate rankedCmdSubstitutionReplacements(int i, string command, string commandId) {
// rank commands by their unique IDs
commandId = rank[i](string c, string id | this.cmdSubstitutionReplacement(c, id, _) | id) and
// since we cannot output (command, ID) tuples from the rank operation,
// we need to work out the specific command associated with the resulting ID
this.cmdSubstitutionReplacement(command, commandId, _)
}
private predicate doReplaceCmdSubstitutions(int line, int round, string old, string new) {
@@ -64,31 +93,56 @@ class BashShellScript extends ShellScript {
this.cmdSubstitutionReplacement(result, _, i)
}
/**
* Holds if `quotedStr` is a string in double quotes in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this quoted string.
*/
private predicate doubleQuotedString(int lineIndex, string quotedStr, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
// double quoted string
quotedStr =
this.cmdSubstitutedLineProducer(lineIndex)
.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", occurrenceIndex, occurrenceOffset) and
id =
"qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
}
/**
* Holds if `quotedStr` is a string in single quotes in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this quoted string.
*/
private predicate singleQuotedString(int lineIndex, string quotedStr, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
// single quoted string
quotedStr =
this.cmdSubstitutedLineProducer(lineIndex)
.regexpFind("'((?:\\\\.|[^'\\\\])*)'", occurrenceIndex, occurrenceOffset) and
id =
"qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
}
private predicate quotedStringReplacement(string quotedStr, string id) {
exists(string line, int k | line = this.cmdSubstitutedLineProducer(k) |
exists(int i, int j |
// double quoted string
quotedStr = line.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", i, j) and
id =
"qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" +
quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
exists(int lineIndex |
this.doubleQuotedString(lineIndex, quotedStr, id)
or
exists(int i, int j |
// single quoted string
quotedStr = line.regexpFind("'((?:\\\\.|[^'\\\\])*)'", i, j) and
id =
"qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" +
quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
this.singleQuotedString(lineIndex, quotedStr, id)
) and
// Only do this for strings that might otherwise disrupt subsequent parsing
quotedStr.regexpMatch("[\"'].*[$\n\r'\"" + Bash::separator() + "].*[\"']")
}
private predicate rankedQuotedStringReplacements(int i, string old, string new) {
old = rank[i](string old2 | this.quotedStringReplacement(old2, _) | old2) and
this.quotedStringReplacement(old, new)
private predicate rankedQuotedStringReplacements(int i, string quotedString, string quotedStringId) {
// rank quoted strings by their nearly-unique IDs
quotedStringId = rank[i](string s, string id | this.quotedStringReplacement(s, id) | id) and
// since we cannot output (string, ID) tuples from the rank operation,
// we need to work out the specific string associated with the resulting ID
this.quotedStringReplacement(quotedString, quotedStringId)
}
private predicate doReplaceQuotedStrings(int line, int round, string old, string new) {

View File

@@ -0,0 +1,81 @@
name: Workflow with complex interpolation
on:
workflow_dispatch:
inputs:
choice-a:
required: true
type: choice
description: choice-a
default: a1
options:
- a1
- a2
- a3
string-b:
required: false
type: string
description: string-b
string-c:
required: false
type: string
description: string-c
list-d:
required: true
type: string
default: d1 d2
description: list-d whitespace separated
list-e:
required: false
type: string
description: list-e whitespace separated
choice-f:
required: true
type: choice
description: choice-f
options:
- false
- true
env:
DRY_TEST: false
B: ${{ github.event.inputs.string-b }}
jobs:
job:
runs-on: ubuntu-latest
steps:
- name: Produce values
id: produce-values
run: |
echo "region=region" >> $GITHUB_OUTPUT
echo "zone=zone" >> $GITHUB_OUTPUT
- name: Step with complex interpolation
id: complex
env:
CHOICE_A: ${{ github.event.inputs.choice-a }}
STRING_B: ${{ github.event.inputs.string-b }}
STRING_C: ${{ github.event.inputs.string-c }}
LIST_D: ${{ github.event.inputs.list-d }}
LIST_E: ${{ github.event.inputs.list-e }}
CHOICE_F: ${{ github.event.inputs.choice-f }}
REGION: ${{ steps.produce-values.outputs.region }}
ZONE: ${{ steps.produce-values.outputs.zone }}
DRY_TEST_JSON: ${{ fromJSON(env.DRY_TEST) }}
FUNCTION_NAME: my-function
USER_EMAIL: 'example@example.com'
TYPE: type
RANGE: '0-100'
run: |
comma_separated_list_d=$(echo "${LIST_D}" | sed "s/ /\",\"/g")
comma_separated_list_e=$(echo "${LIST_E}" | sed "s/ /\",\"/g")
c1=$(echo "${STRING_C}" | cut -d "-" -f 1)
c2=$(echo "${STRING_C}" | cut -d "-" -f 2)
# Similar commands that use JSON payloads with string interpolation.
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":["'"${comma_separated_list_e}"'"],"dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
shell: bash