diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 912ba46c57b..664bfde89bf 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -12,6 +12,30 @@ on: jobs: + build_query_pack: + runs-on: ubuntu-latest-xl + steps: + - uses: actions/checkout@v2 + - name: Find codeql + id: find-codeql + uses: github/codeql-action/init@esbena/ql + with: + languages: javascript # does not matter + - name: Build query pack + run: | + cd ql/src + "${CODEQL}" pack create + cd .codeql/pack/codeql/ql-all/0.0.0 + zip "${PACKZIP}" -r . + env: + CODEQL: ${{ steps.find-codeql.outputs.codeql-path }} + PACKZIP: ${{ runner.temp }}/query-pack.zip + - name: Upload query pack + uses: actions/upload-artifact@v2 + with: + name: query-pack + path: ${{ runner.temp }}/query-pack.zip + # XXX this is mostly an inlined copy of the 'build' job in build.yml build_extractor_pack: strategy: @@ -79,9 +103,11 @@ jobs: analyze: name: Analyze - needs: build_extractor_pack + needs: + - build_query_pack + - build_extractor_pack - runs-on: ubuntu-latest + runs-on: ubuntu-latest-xl permissions: actions: read @@ -89,61 +115,55 @@ jobs: security-events: write steps: - - name: Download pack + - name: Download query pack + uses: actions/download-artifact@v2 + with: + name: query-pack + path: ${{ runner.temp }}/query-pack-artifact + + - name: Download extractor pack uses: actions/download-artifact@v2 with: name: extractor-pack path: ${{ runner.temp }}/extractor-pack-artifact - - name: Unzip pack + - name: Prepare packs + id: prepare-packs run: | set -x - mkdir "${PACKTMP}" - cd "${PACKTMP}" - unzip "${PACKARTIFACT}/*.zip" -d unzipped - cp -r unzipped/ql "${PACK}" + mkdir -p "${COMPLETE_PACK}" "${PACKS_TMP}" + cd "${PACKS_TMP}" + unzip "${QUERY_PACK_ARTIFACT}/*.zip" -d query-pack-artifact-unzipped + cp -r query-pack-artifact-unzipped/. "${COMPLETE_PACK}" + unzip "${EXTRACTOR_PACK_ARTIFACT}/*.zip" -d extractor-pack-artifact-unzipped + cp -r extractor-pack-artifact-unzipped/ql/. "${COMPLETE_PACK}" + cd "${COMPLETE_PACK}" + zip "${COMPLETE_PACK_ZIP}" -r . env: - PACKTMP: ${{ runner.temp }}/extractor-pack-artifact.tmp - PACKARTIFACT: ${{ runner.temp }}/extractor-pack-artifact - PACK: ${{ runner.temp }}/extractor-pack - - - name: Checkout repository - uses: actions/checkout@v2 - - - name: Make config file - run: | - set -x - echo "name: CodeQL config for QL" >> "${CONFIG_FILE}" - echo "" >> "${CONFIG_FILE}" - echo "disable-default-queries: true" >> "${CONFIG_FILE}" - echo "" >> "${CONFIG_FILE}" - echo "queries: " >> "${CONFIG_FILE}" - echo " - name: Standard queries" >> "${CONFIG_FILE}" - echo " uses: ${SUITE}" >> "${CONFIG_FILE}" - cat "${CONFIG_FILE}" - env: - SUITE: ./ql/src/codeql-suites/ql-code-scanning.qls - CONFIG_FILE: ./.custom-codeql-actions-config.yml + PACKS_TMP: ${{ runner.temp }}/pack-artifacts.tmp + QUERY_PACK_ARTIFACT: ${{ runner.temp }}/query-pack-artifact + EXTRACTOR_PACK_ARTIFACT: ${{ runner.temp }}/extractor-pack-artifact + COMPLETE_PACK: ${{ runner.temp }}/pack + COMPLETE_PACK_ZIP: ${{ runner.temp }}/pack.zip - name: Hack codeql-action options run: | - JSON=$(jq -nc --arg pack "${PACK}" '.resolve.extractor=["--search-path", $pack] | .database.init=["--search-path", $pack]') + JSON=$(jq -nc --arg pack "${COMPLETE_PACK}" '.resolve.queries=["--search-path", $pack] | .resolve.extractor=["--search-path", $pack] | .database.init=["--search-path", $pack]') echo "CODEQL_ACTION_EXTRA_OPTIONS=${JSON}" >> ${GITHUB_ENV} env: - PACK: ${{ runner.temp }}/extractor-pack + COMPLETE_PACK: ${{ runner.temp }}/pack + + - name: Checkout repository + uses: actions/checkout@v2 - name: Initialize CodeQL uses: github/codeql-action/init@esbena/ql with: languages: ql db-location: ${{ runner.temp }}/db - config-file: ./.custom-codeql-actions-config.yml - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@esbena/ql - with: - results: ${{ runner.temp }}/results - add-snippets: true - name: Upload db uses: actions/upload-artifact@v2 @@ -152,9 +172,9 @@ jobs: path: ${{ runner.temp }}/db retention-days: 1 - - name: Upload results + - name: Upload complete pack uses: actions/upload-artifact@v2 with: - name: results - path: ${{ runner.temp }}/results + name: complete-pack + path: ${{ runner.temp }}/pack.zip retention-days: 1 diff --git a/Cargo.lock b/Cargo.lock index 1fbfc0c3097..2b35c05221a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -589,7 +589,7 @@ dependencies = [ [[package]] name = "tree-sitter-ql" version = "0.19.0" -source = "git+https://github.com/tausbn/tree-sitter-ql.git?rev=577c43d96c93915bd7ae9c2765d11be8db102952#577c43d96c93915bd7ae9c2765d11be8db102952" +source = "git+https://github.com/tausbn/tree-sitter-ql.git?rev=36bdc0eae196f9833182ce3f8932be63534121b3#36bdc0eae196f9833182ce3f8932be63534121b3" dependencies = [ "cc", "tree-sitter", diff --git a/codeql-ql.code-workspace b/codeql-ql.code-workspace deleted file mode 100644 index 7610040e098..00000000000 --- a/codeql-ql.code-workspace +++ /dev/null @@ -1,14 +0,0 @@ -{ - "folders": [ - { - "path": "." - } - ], - "settings": { - "editor.formatOnSave": true, - "files.eol": "\n", - "files.exclude": { - "codeql": true - } - } -} \ No newline at end of file diff --git a/extractor/Cargo.toml b/extractor/Cargo.toml index 522aa605f47..3ccf1ce3430 100644 --- a/extractor/Cargo.toml +++ b/extractor/Cargo.toml @@ -10,7 +10,7 @@ edition = "2018" flate2 = "1.0" node-types = { path = "../node-types" } tree-sitter = "0.19" -tree-sitter-ql = { git = "https://github.com/tausbn/tree-sitter-ql.git", rev = "577c43d96c93915bd7ae9c2765d11be8db102952" } +tree-sitter-ql = { git = "https://github.com/tausbn/tree-sitter-ql.git", rev = "36bdc0eae196f9833182ce3f8932be63534121b3" } clap = "2.33" tracing = "0.1" tracing-subscriber = { version = "0.2", features = ["env-filter"] } diff --git a/generator/Cargo.toml b/generator/Cargo.toml index c3d9111137f..93117ef1c3e 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -10,4 +10,4 @@ edition = "2018" node-types = { path = "../node-types" } tracing = "0.1" tracing-subscriber = { version = "0.2", features = ["env-filter"] } -tree-sitter-ql = { git = "https://github.com/tausbn/tree-sitter-ql.git", rev = "577c43d96c93915bd7ae9c2765d11be8db102952" } +tree-sitter-ql = { git = "https://github.com/tausbn/tree-sitter-ql.git", rev = "36bdc0eae196f9833182ce3f8932be63534121b3" } diff --git a/ql/src/codeql-suites/ql-all.qls b/ql/src/codeql-suites/ql-all.qls new file mode 100644 index 00000000000..e4c050bfa98 --- /dev/null +++ b/ql/src/codeql-suites/ql-all.qls @@ -0,0 +1,8 @@ +- description: All Code Scanning queries for QL +- queries: . +- include: + kind: + - problem + - path-problem + - alert + - path-alert diff --git a/ql/src/codeql-suites/ql-code-scanning.qls b/ql/src/codeql-suites/ql-code-scanning.qls index ae349649944..f6b2a097b0a 100644 --- a/ql/src/codeql-suites/ql-code-scanning.qls +++ b/ql/src/codeql-suites/ql-code-scanning.qls @@ -6,3 +6,13 @@ - path-problem - alert - path-alert + precision: + - high + - very-high + problem.severity: + - error + - warning +- exclude: + deprecated: // +- exclude: + query path: /^experimental\/.*/ diff --git a/ql/src/codeql/GlobalValueNumbering.qll b/ql/src/codeql/GlobalValueNumbering.qll new file mode 100644 index 00000000000..e9b4728a6be --- /dev/null +++ b/ql/src/codeql/GlobalValueNumbering.qll @@ -0,0 +1,244 @@ +private import ql +private import codeql_ql.ast.internal.Predicate +private import codeql_ql.ast.internal.Type +private import codeql_ql.ast.internal.Builtins + +private newtype TValueNumber = + TVariableValueNumber(VarDecl var) { variableAccessValueNumber(_, var) } or + TFieldValueNumber(VarDecl var) { fieldAccessValueNumber(_, var) } or + TThisValueNumber(Predicate pred) { thisAccessValueNumber(_, pred) } or + TPredicateValueNumber(PredicateOrBuiltin pred, ValueNumberArgumentList args) { + predicateCallValueNumber(_, pred, args) + } or + TClassPredicateValueNumber(PredicateOrBuiltin pred, ValueNumber base, ValueNumberArgumentList args) { + classPredicateCallValueNumber(_, pred, base, args) + } or + TLiteralValueNumber(string value, Type t) { literalValueNumber(_, value, t) } or + TBinaryOpValueNumber(FunctionSymbol symbol, ValueNumber leftOperand, ValueNumber rightOperand) { + binaryOperandValueNumber(_, symbol, leftOperand, rightOperand) + } or + TUnaryOpValueNumber(FunctionSymbol symbol, ValueNumber operand) { + unaryOperandValueNumber(_, symbol, operand) + } or + TInlineCastValueNumber(ValueNumber operand, Type t) { inlineCastValueNumber(_, operand, t) } or + TDontCareValueNumber() or + TRangeValueNumber(ValueNumber lower, ValueNumber high) { rangeValueNumber(_, lower, high) } or + TSetValueNumber(ValueNumberElementList elements) { setValueNumber(_, elements) } or + TUniqueValueNumber(Expr e) { uniqueValueNumber(e) } + +private newtype ValueNumberArgumentList = + MkArgsNil() or + MkArgsCons(ValueNumber head, ValueNumberArgumentList tail) { + argumentValueNumbers(_, _, head, tail) + } + +private newtype ValueNumberElementList = + MkElementsNil() or + MkElementsCons(ValueNumber head, ValueNumberElementList tail) { + setValueNumbers(_, _, head, tail) + } + +private ValueNumberArgumentList argumentValueNumbers(Call call, int start) { + start = call.getNumberOfArguments() and + result = MkArgsNil() + or + exists(ValueNumber head, ValueNumberArgumentList tail | + argumentValueNumbers(call, start, head, tail) and + result = MkArgsCons(head, tail) + ) +} + +private predicate argumentValueNumbers( + Call call, int start, ValueNumber head, ValueNumberArgumentList tail +) { + head = valueNumber(call.getArgument(start)) and + tail = argumentValueNumbers(call, start + 1) +} + +private ValueNumberElementList setValueNumbers(Set set, int start) { + start = set.getNumberOfElements() and + result = MkElementsNil() + or + exists(ValueNumber head, ValueNumberElementList tail | + setValueNumbers(set, start, head, tail) and + result = MkElementsCons(head, tail) + ) +} + +private predicate setValueNumbers(Set set, int start, ValueNumber head, ValueNumberElementList tail) { + head = valueNumber(set.getElement(start)) and + tail = setValueNumbers(set, start + 1) +} + +/** + * A value number. A value number represents a collection of expressions that compute to the same value + * at runtime. + */ +class ValueNumber extends TValueNumber { + string toString() { result = "GVN" } + + /** Gets an expression that has this value number. */ + final Expr getAnExpr() { this = valueNumber(result) } +} + +private predicate uniqueValueNumber(Expr e) { not numberable(e) } + +private predicate numberable(Expr e) { + e instanceof VarAccess or + e instanceof FieldAccess or + e instanceof ThisAccess or + e instanceof Call or + e instanceof Literal or + e instanceof BinOpExpr or + e instanceof UnaryExpr or + e instanceof InlineCast or + e instanceof ExprAnnotation or + e instanceof DontCare or + e instanceof Range or + e instanceof Set or + e instanceof AsExpr +} + +private predicate variableAccessValueNumber(VarAccess access, VarDef var) { + access.getDeclaration() = var +} + +private predicate fieldAccessValueNumber(FieldAccess access, VarDef var) { + access.getDeclaration() = var +} + +private predicate thisAccessValueNumber(ThisAccess access, Predicate pred) { + access.getEnclosingPredicate() = pred +} + +private predicate predicateCallValueNumber( + Call call, PredicateOrBuiltin pred, ValueNumberArgumentList args +) { + call.getTarget() = pred and + not exists(call.(MemberCall).getBase()) and + args = argumentValueNumbers(call, 0) +} + +private predicate classPredicateCallValueNumber( + MemberCall call, PredicateOrBuiltin pred, ValueNumber base, ValueNumberArgumentList args +) { + call.getTarget() = pred and + valueNumber(call.getBase()) = base and + args = argumentValueNumbers(call, 0) +} + +private predicate literalValueNumber(Literal lit, string value, Type t) { + lit.(String).getValue() = value and + t instanceof StringClass + or + lit.(Integer).getValue().toString() = value and + t instanceof IntClass + or + lit.(Float).getValue().toString() = value and + t instanceof FloatClass + or + lit.(Boolean).isFalse() and + value = "false" and + t instanceof BooleanClass + or + lit.(Boolean).isTrue() and + value = "true" and + t instanceof BooleanClass +} + +private predicate binaryOperandValueNumber( + BinOpExpr e, FunctionSymbol symbol, ValueNumber leftOperand, ValueNumber rightOperand +) { + e.getOperator() = symbol and + valueNumber(e.getLeftOperand()) = leftOperand and + valueNumber(e.getRightOperand()) = rightOperand +} + +private predicate unaryOperandValueNumber(UnaryExpr e, FunctionSymbol symbol, ValueNumber operand) { + e.getOperator() = symbol and + valueNumber(e.getOperand()) = operand +} + +private predicate inlineCastValueNumber(InlineCast cast, ValueNumber operand, Type t) { + valueNumber(cast.getBase()) = operand and + cast.getTypeExpr().getResolvedType() = t +} + +private predicate rangeValueNumber(Range range, ValueNumber lower, ValueNumber high) { + valueNumber(range.getLowEndpoint()) = lower and + valueNumber(range.getHighEndpoint()) = high +} + +private predicate setValueNumber(Set set, ValueNumberElementList elements) { + elements = setValueNumbers(set, 0) +} + +private TValueNumber nonUniqueValueNumber(Expr e) { + exists(VarDecl var | + variableAccessValueNumber(e, var) and + result = TVariableValueNumber(var) + ) + or + exists(VarDecl var | + fieldAccessValueNumber(e, var) and + result = TFieldValueNumber(var) + ) + or + exists(Predicate pred | + thisAccessValueNumber(e, pred) and + result = TThisValueNumber(pred) + ) + or + exists(PredicateOrBuiltin pred, ValueNumberArgumentList args | + predicateCallValueNumber(e, pred, args) and + result = TPredicateValueNumber(pred, args) + ) + or + exists(PredicateOrBuiltin pred, ValueNumber base, ValueNumberArgumentList args | + classPredicateCallValueNumber(e, pred, base, args) and + result = TClassPredicateValueNumber(pred, base, args) + ) + or + exists(string value, Type t | + literalValueNumber(e, value, t) and + result = TLiteralValueNumber(value, t) + ) + or + exists(FunctionSymbol symbol, ValueNumber leftOperand, ValueNumber rightOperand | + binaryOperandValueNumber(e, symbol, leftOperand, rightOperand) and + result = TBinaryOpValueNumber(symbol, leftOperand, rightOperand) + ) + or + exists(FunctionSymbol symbol, ValueNumber operand | + unaryOperandValueNumber(e, symbol, operand) and + result = TUnaryOpValueNumber(symbol, operand) + ) + or + exists(ValueNumber operand, Type t | + inlineCastValueNumber(e, operand, t) and + result = TInlineCastValueNumber(operand, t) + ) + or + result = valueNumber([e.(ExprAnnotation).getExpression(), e.(AsExpr).getInnerExpr()]) + or + e instanceof DontCare and result = TDontCareValueNumber() + or + exists(ValueNumber lower, ValueNumber high | + rangeValueNumber(e, lower, high) and + result = TRangeValueNumber(lower, high) + ) + or + exists(ValueNumberElementList elements | + setValueNumber(e, elements) and + result = TSetValueNumber(elements) + ) +} + +/** Gets the value number of an expression `e`. */ +cached +TValueNumber valueNumber(Expr e) { + result = nonUniqueValueNumber(e) + or + uniqueValueNumber(e) and + result = TUniqueValueNumber(e) +} diff --git a/ql/src/codeql_ql/ast/Ast.qll b/ql/src/codeql_ql/ast/Ast.qll index 0d3ee7d1714..2c93da2af47 100644 --- a/ql/src/codeql_ql/ast/Ast.qll +++ b/ql/src/codeql_ql/ast/Ast.qll @@ -4,6 +4,7 @@ private import codeql_ql.ast.internal.Module private import codeql_ql.ast.internal.Predicate import codeql_ql.ast.internal.Type private import codeql_ql.ast.internal.Variable +private import codeql_ql.ast.internal.Builtins bindingset[name] private string directMember(string name) { result = name + "()" } @@ -16,15 +17,6 @@ private string stringIndexedMember(string name, string index) { result = name + "(_)" and exists(index) } -/** - * Holds if `node` has an annotation with `name`. - */ -private predicate hasAnnotation(AstNode node, string name) { - exists(Generated::Annotation annotation | annotation.getName().getValue() = name | - toGenerated(node).getParent() = annotation.getParent() - ) -} - /** An AST node of a QL program */ class AstNode extends TAstNode { string toString() { result = getAPrimaryQlClass() } @@ -40,6 +32,20 @@ class AstNode extends TAstNode { ) } + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + if exists(getLocation()) + then getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + else ( + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + ) + } + /** * Gets the parent in the AST for this node. */ @@ -60,6 +66,12 @@ class AstNode extends TAstNode { /** Gets the QLDoc comment for this AST node, if any. */ QLDoc getQLDoc() { none() } + /** Holds if `node` has an annotation with `name`. */ + predicate hasAnnotation(string name) { this.getAnAnnotation().getName() = name } + + /** Gets an annotation of this AST node. */ + Annotation getAnAnnotation() { toGenerated(this).getParent() = toGenerated(result).getParent() } + /** * Gets the predicate that contains this AST node. */ @@ -180,11 +192,62 @@ class Select extends TSelect, AstNode { override string getAPrimaryQlClass() { result = "Select" } } +class PredicateOrBuiltin extends TPredOrBuiltin, AstNode { + string getName() { none() } + + Type getDeclaringType() { none() } + + Type getParameterType(int i) { none() } + + Type getReturnType() { none() } + + int getArity() { result = count(getParameterType(_)) } + + predicate isPrivate() { none() } +} + +class BuiltinPredicate extends PredicateOrBuiltin, TBuiltin { + override string toString() { result = getName() } + + override string getAPrimaryQlClass() { result = "BuiltinPredicate" } +} + +private class BuiltinClassless extends BuiltinPredicate, TBuiltinClassless { + string name; + string ret; + string args; + + BuiltinClassless() { this = TBuiltinClassless(ret, name, args) } + + override string getName() { result = name } + + override PrimitiveType getReturnType() { result.getName() = ret } + + override PrimitiveType getParameterType(int i) { result.getName() = getArgType(args, i) } +} + +private class BuiltinMember extends BuiltinPredicate, TBuiltinMember { + string name; + string qual; + string ret; + string args; + + BuiltinMember() { this = TBuiltinMember(qual, ret, name, args) } + + override string getName() { result = name } + + override PrimitiveType getReturnType() { result.getName() = ret } + + override PrimitiveType getParameterType(int i) { result.getName() = getArgType(args, i) } + + override PrimitiveType getDeclaringType() { result.getName() = qual } +} + /** * A QL predicate. * Either a classless predicate, a class predicate, or a characteristic predicate. */ -class Predicate extends TPredicate, AstNode, Declaration { +class Predicate extends TPredicate, AstNode, PredicateOrBuiltin, Declaration { /** * Gets the body of the predicate. */ @@ -203,7 +266,7 @@ class Predicate extends TPredicate, AstNode, Declaration { /** * Gets the number of parameters. */ - int getArity() { + override int getArity() { not this.(ClasslessPredicate).getAlias() instanceof PredicateExpr and result = count(getParameter(_)) or @@ -212,12 +275,19 @@ class Predicate extends TPredicate, AstNode, Declaration { ) } + /** + * Holds if this predicate is private. + */ + override predicate isPrivate() { hasAnnotation("private") } + /** * Gets the return type (if any) of the predicate. */ TypeExpr getReturnTypeExpr() { none() } - Type getReturnType() { result = this.getReturnTypeExpr().getResolvedType() } + override Type getReturnType() { result = this.getReturnTypeExpr().getResolvedType() } + + override Type getParameterType(int i) { result = this.getParameter(i).getType() } override AstNode getAChild(string pred) { result = super.getAChild(pred) @@ -232,6 +302,45 @@ class Predicate extends TPredicate, AstNode, Declaration { override string getAPrimaryQlClass() { result = "Predicate" } } +/** + * A relation in the database. + */ +class Relation extends TDBRelation, AstNode, Declaration { + Generated::DbTable table; + + Relation() { this = TDBRelation(table) } + + /** + * Gets the name of the relation. + */ + override string getName() { result = table.getTableName().getChild().getValue() } + + private Generated::DbColumn getColumn(int i) { + result = + rank[i + 1](Generated::DbColumn column, int child | + table.getChild(child) = column + | + column order by child + ) + } + + /** Gets the `i`th parameter name */ + string getParameterName(int i) { result = getColumn(i).getColName().getValue() } + + /** Gets the `i`th parameter type */ + string getParameterType(int i) { + // TODO: This is just using the name of the type, not the actual type. Checkout Type.qll + result = getColumn(i).getColType().getChild().(Generated::Token).getValue() + } + + /** + * Gets the number of parameters. + */ + int getArity() { result = count(getColumn(_)) } + + override string getAPrimaryQlClass() { result = "Relation" } +} + /** * An expression that refers to a predicate, e.g. `BasicBlock::succ/2`. */ @@ -340,6 +449,8 @@ class ClasslessPredicate extends TClasslessPredicate, Predicate, ModuleDeclarati or pred_name = directMember("getReturnTypeExpr") and result = this.getReturnTypeExpr() } + + override predicate isPrivate() { Predicate.super.isPrivate() } } /** @@ -358,15 +469,10 @@ class ClassPredicate extends TClassPredicate, Predicate { override Class getParent() { result.getAClassPredicate() = this } - /** - * Holds if this predicate is private. - */ - predicate isPrivate() { hasAnnotation(this, "private") } - /** * Holds if this predicate is annotated as overriding another predicate. */ - predicate isOverride() { hasAnnotation(this, "override") } + predicate isOverride() { hasAnnotation("override") } override VarDecl getParameter(int i) { toGenerated(result) = @@ -380,7 +486,7 @@ class ClassPredicate extends TClassPredicate, Predicate { /** * Gets the type representing this class. */ - ClassType getDeclaringType() { result.getDeclaration() = getParent() } + override ClassType getDeclaringType() { result.getDeclaration() = getParent() } predicate overrides(ClassPredicate other) { predOverrides(this, other) } @@ -417,7 +523,7 @@ class CharPred extends TCharPred, Predicate { pred_name = directMember("getBody") and result = this.getBody() } - ClassType getDeclaringType() { result.getDeclaration() = getParent() } + override ClassType getDeclaringType() { result.getDeclaration() = getParent() } } /** @@ -579,7 +685,7 @@ class Module extends TModule, ModuleDeclaration { */ class ModuleMember extends TModuleMember, AstNode { /** Holds if this member is declared as `private`. */ - predicate isPrivate() { hasAnnotation(this, "private") } + predicate isPrivate() { hasAnnotation("private") } } /** A declaration. E.g. a class, type, predicate, newtype... */ @@ -665,7 +771,7 @@ class Class extends TClass, TypeDeclaration, ModuleDeclaration { /** * Gets a super-type referenced in the `extends` part of the class declaration. */ - TypeExpr getASuperType() { toGenerated(result) in [cls.getExtends(_), cls.getInstanceof(_)] } + TypeExpr getASuperType() { toGenerated(result) = cls.getExtends(_) } /** Gets the type that this class is defined to be an alias of. */ TypeExpr getAliasType() { @@ -728,7 +834,7 @@ class NewType extends TNewType, TypeDeclaration, ModuleDeclaration { * A branch in a `newtype`. * E.g. `Bar()` or `Baz()` in `newtype Foo = Bar() or Baz()`. */ -class NewTypeBranch extends TNewTypeBranch, TypeDeclaration { +class NewTypeBranch extends TNewTypeBranch, PredicateOrBuiltin, TypeDeclaration { Generated::DatatypeBranch branch; NewTypeBranch() { this = TNewTypeBranch(branch) } @@ -750,6 +856,16 @@ class NewTypeBranch extends TNewTypeBranch, TypeDeclaration { /** Gets the body of this branch. */ Formula getBody() { toGenerated(result) = branch.getChild(_).(Generated::Body).getChild() } + override NewTypeBranchType getReturnType() { result.getDeclaration() = this } + + override Type getParameterType(int i) { result = this.getField(i).getType() } + + override int getArity() { result = count(this.getField(_)) } + + override Type getDeclaringType() { none() } + + override predicate isPrivate() { this.getNewType().isPrivate() } + override QLDoc getQLDoc() { toGenerated(result) = branch.getChild(_) } NewType getNewType() { result.getABranch() = this } @@ -777,6 +893,9 @@ class Call extends TCall, Expr, Formula { none() // overriden in sublcasses. } + /** Gets an argument of this call, if any. */ + final Expr getAnArgument() { result = getArgument(_) } + PredicateOrBuiltin getTarget() { resolveCall(this, result) } override Type getType() { result = this.getTarget().getReturnType() } @@ -1752,7 +1871,19 @@ class FunctionSymbol extends string { /** * A binary operation expression, such as `x + 3` or `y / 2`. */ -class BinOpExpr extends TBinOpExpr, Expr { } +class BinOpExpr extends TBinOpExpr, Expr { + /** Gets the left operand of the binary expression. */ + Expr getLeftOperand() { none() } // overriden in subclasses + + /* Gets the right operand of the binary expression. */ + Expr getRightOperand() { none() } // overriden in subclasses + + /** Gets the operator of the binary expression. */ + FunctionSymbol getOperator() { none() } // overriden in subclasses + + /* Gets an operand of the binary expression. */ + final Expr getAnOperand() { result = getLeftOperand() or result = getRightOperand() } +} /** * An addition or subtraction expression. @@ -1763,17 +1894,11 @@ class AddSubExpr extends TAddSubExpr, BinOpExpr { AddSubExpr() { this = TAddSubExpr(expr) and operator = expr.getChild().getValue() } - /** Gets the left operand of the binary expression. */ - Expr getLeftOperand() { toGenerated(result) = expr.getLeft() } + override Expr getLeftOperand() { toGenerated(result) = expr.getLeft() } - /* Gets the right operand of the binary expression. */ - Expr getRightOperand() { toGenerated(result) = expr.getRight() } + override Expr getRightOperand() { toGenerated(result) = expr.getRight() } - /* Gets an operand of the binary expression. */ - Expr getAnOperand() { result = getLeftOperand() or result = getRightOperand() } - - /** Gets the operator of the binary expression. */ - FunctionSymbol getOperator() { result = operator } + override FunctionSymbol getOperator() { result = operator } override PrimitiveType getType() { // Both operands are the same type @@ -1833,16 +1958,12 @@ class MulDivModExpr extends TMulDivModExpr, BinOpExpr { MulDivModExpr() { this = TMulDivModExpr(expr) and operator = expr.getChild().getValue() } /** Gets the left operand of the binary expression. */ - Expr getLeftOperand() { toGenerated(result) = expr.getLeft() } + override Expr getLeftOperand() { toGenerated(result) = expr.getLeft() } /** Gets the right operand of the binary expression. */ - Expr getRightOperand() { toGenerated(result) = expr.getRight() } + override Expr getRightOperand() { toGenerated(result) = expr.getRight() } - /** Gets an operand of the binary expression. */ - Expr getAnOperand() { result = getLeftOperand() or result = getRightOperand() } - - /** Gets the operator of the binary expression. */ - FunctionSymbol getOperator() { result = operator } + override FunctionSymbol getOperator() { result = operator } override PrimitiveType getType() { // Both operands are of the same type @@ -1915,6 +2036,11 @@ class Range extends TRange, Expr { */ Expr getHighEndpoint() { toGenerated(result) = range.getUpper() } + /** + * Gets the lower and upper bounds of the range. + */ + Expr getAnEndpoint() { result = [getLowEndpoint(), getHighEndpoint()] } + override PrimitiveType getType() { result.getName() = "int" } override string getAPrimaryQlClass() { result = "Range" } @@ -1941,6 +2067,16 @@ class Set extends TSet, Expr { */ Expr getElement(int i) { toGenerated(result) = set.getChild(i) } + /** + * Gets an element in this set literal expression, if any. + */ + Expr getAnElement() { result = getElement(_) } + + /** + * Gets the number of elements in this set literal expression. + */ + int getNumberOfElements() { result = count(getAnElement()) } + override Type getType() { result = this.getElement(0).getType() } override string getAPrimaryQlClass() { result = "Set" } @@ -2030,3 +2166,310 @@ class ModuleExpr extends TModuleExpr, ModuleRef { pred = directMember("getQualifier") and result = this.getQualifier() } } + +/** An argument to an annotation. */ +private class AnnotationArg extends TAnnotationArg, AstNode { + Generated::AnnotArg arg; + + AnnotationArg() { this = TAnnotationArg(arg) } + + /** Gets the name of this argument. */ + string getValue() { + result = + [ + arg.getChild().(Generated::SimpleId).getValue(), + arg.getChild().(Generated::Result).getValue(), arg.getChild().(Generated::This).getValue() + ] + } + + override string toString() { result = this.getValue() } +} + +private class NoInlineArg extends AnnotationArg { + NoInlineArg() { this.getValue() = "noinline" } +} + +private class NoMagicArg extends AnnotationArg { + NoMagicArg() { this.getValue() = "nomagic" } +} + +private class InlineArg extends AnnotationArg { + InlineArg() { this.getValue() = "inline" } +} + +private class NoOptArg extends AnnotationArg { + NoOptArg() { this.getValue() = "noopt" } +} + +private class MonotonicAggregatesArg extends AnnotationArg { + MonotonicAggregatesArg() { this.getValue() = "monotonicAggregates" } +} + +/** An annotation on an element. */ +class Annotation extends TAnnotation, AstNode { + Generated::Annotation annot; + + Annotation() { this = TAnnotation(annot) } + + override string toString() { result = "annotation" } + + override string getAPrimaryQlClass() { result = "Annotation" } + + override Location getLocation() { result = annot.getLocation() } + + /** Gets the node corresponding to the field `args`. */ + AnnotationArg getArgs(int i) { toGenerated(result) = annot.getArgs(i) } + + /** Gets the node corresponding to the field `name`. */ + string getName() { result = annot.getName().getValue() } +} + +/** A `pragma[noinline]` annotation. */ +class NoInline extends Annotation { + NoInline() { this.getArgs(0) instanceof NoInlineArg } + + override string toString() { result = "noinline" } +} + +/** A `pragma[inline]` annotation. */ +class Inline extends Annotation { + Inline() { this.getArgs(0) instanceof InlineArg } + + override string toString() { result = "inline" } +} + +/** A `pragma[nomagic]` annotation. */ +class NoMagic extends Annotation { + NoMagic() { this.getArgs(0) instanceof NoMagicArg } + + override string toString() { result = "nomagic" } +} + +/** A `pragma[noopt]` annotation. */ +class NoOpt extends Annotation { + NoOpt() { this.getArgs(0) instanceof NoOptArg } + + override string toString() { result = "noopt" } +} + +/** A `language[monotonicAggregates]` annotation. */ +class MonotonicAggregates extends Annotation { + MonotonicAggregates() { this.getArgs(0) instanceof MonotonicAggregatesArg } + + override string toString() { result = "monotonicaggregates" } +} + +/** A `bindingset` annotation. */ +class BindingSet extends Annotation { + BindingSet() { this.getName() = "bindingset" } + + /** Gets the `index`'th bound name in this bindingset. */ + string getBoundName(int index) { result = this.getArgs(index).getValue() } + + /** Gets a name bound by this bindingset, if any. */ + string getABoundName() { result = getBoundName(_) } + + /** Gets the number of names bound by this bindingset. */ + int getNumberOfBoundNames() { result = count(getABoundName()) } +} + +/** + * Classes modelling YAML AST nodes. + */ +module YAML { + /** A node in a YAML file */ + class YAMLNode extends TYAMLNode, AstNode { + /** Holds if the predicate is a root node (has no parent) */ + predicate isRoot() { not exists(getParent()) } + } + + /** A YAML comment. */ + class YAMLComment extends TYamlCommemt, YAMLNode { + Generated::YamlComment yamlcomment; + + YAMLComment() { this = TYamlCommemt(yamlcomment) } + + override string getAPrimaryQlClass() { result = "YAMLComment" } + } + + /** A YAML entry. */ + class YAMLEntry extends TYamlEntry, YAMLNode { + Generated::YamlEntry yamle; + + YAMLEntry() { this = TYamlEntry(yamle) } + + /** Gets the key of this YAML entry. */ + YAMLKey getKey() { + exists(Generated::YamlKeyvaluepair pair | + pair.getParent() = yamle and + result = TYamlKey(pair.getKey()) + ) + } + + /** Gets the value of this YAML entry. */ + YAMLValue getValue() { + exists(Generated::YamlKeyvaluepair pair | + pair.getParent() = yamle and + result = TYamlValue(pair.getValue()) + ) + } + + override string getAPrimaryQlClass() { result = "YAMLEntry" } + } + + /** A YAML key. */ + class YAMLKey extends TYamlKey, YAMLNode { + Generated::YamlKey yamlkey; + + YAMLKey() { this = TYamlKey(yamlkey) } + + /** + * Gets the value of this YAML key. + */ + YAMLValue getValue() { + exists(Generated::YamlKeyvaluepair pair | + pair.getKey() = yamlkey and result = TYamlValue(pair.getValue()) + ) + } + + override string getAPrimaryQlClass() { result = "YAMLKey" } + + /** Gets the value of this YAML value. */ + string getNamePart(int i) { + i = 0 and result = yamlkey.getChild(0).(Generated::SimpleId).getValue() + or + exists(YAMLKey child | + child = TYamlKey(yamlkey.getChild(1)) and + result = child.getNamePart(i - 1) + ) + } + + /** + * Gets all the name parts of this YAML key concatenated with `/`. + * Dashes are replaced with `/` (because we don't have that information in the generated AST). + */ + string getQualifiedName() { + result = concat(string part, int i | part = getNamePart(i) | part, "/" order by i) + } + } + + /** A YAML list item. */ + class YAMLListItem extends TYamlListitem, YAMLNode { + Generated::YamlListitem yamllistitem; + + YAMLListItem() { this = TYamlListitem(yamllistitem) } + + /** + * Gets the value of this YAML list item. + */ + YAMLValue getValue() { result = TYamlValue(yamllistitem.getChild()) } + + override string getAPrimaryQlClass() { result = "YAMLListItem" } + } + + /** A YAML value. */ + class YAMLValue extends TYamlValue, YAMLNode { + Generated::YamlValue yamlvalue; + + YAMLValue() { this = TYamlValue(yamlvalue) } + + override string getAPrimaryQlClass() { result = "YAMLValue" } + + /** Gets the value of this YAML value. */ + string getValue() { result = yamlvalue.getValue() } + } + + // to not expose the entire `File` API on `QlPack`. + private newtype TQLPack = MKQlPack(File file) { file.getBaseName() = "qlpack.yml" } + + YAMLEntry test() { not result.isRoot() } + + /** + * A `qlpack.yml` file. + */ + class QLPack extends MKQlPack { + File file; + + QLPack() { this = MKQlPack(file) } + + private string getProperty(string name) { + exists(YAMLEntry entry | + entry.isRoot() and + entry.getKey().getQualifiedName() = name and + result = entry.getValue().getValue().trim() and + entry.getLocation().getFile() = file + ) + } + + /** Gets the name of this qlpack */ + string getName() { result = getProperty("name") } + + /** Gets the version of this qlpack */ + string getVersion() { result = getProperty("version") } + + /** Gets the extractor of this qlpack */ + string getExtractor() { result = getProperty("extractor") } + + string toString() { result = getName() } + + /** Gets the file that this `QLPack` represents. */ + File getFile() { result = file } + + private predicate isADependency(YAMLEntry entry) { + exists(YAMLEntry deps | + deps.getLocation().getFile() = file and entry.getLocation().getFile() = file + | + deps.isRoot() and + deps.getKey().getQualifiedName() = "dependencies" and + entry.getLocation().getStartLine() = 1 + deps.getLocation().getStartLine() and + entry.getLocation().getStartColumn() > deps.getLocation().getStartColumn() + ) + or + exists(YAMLEntry prev | isADependency(prev) | + prev.getLocation().getFile() = file and + entry.getLocation().getFile() = file and + entry.getLocation().getStartLine() = 1 + prev.getLocation().getStartLine() and + entry.getLocation().getStartColumn() = prev.getLocation().getStartColumn() + ) + } + + predicate hasDependency(string name, string version) { + exists(YAMLEntry entry | isADependency(entry) | + entry.getKey().getQualifiedName() = name and + entry.getValue().getValue() = version + ) + } + + /** Gets the database scheme of this qlpack */ + File getDBScheme() { + result.getBaseName() = getProperty("dbscheme") and + result = file.getParentContainer().getFile(any(string s | s.matches("%.dbscheme"))) + } + + pragma[noinline] + Container getAFileInPack() { + result.getParentContainer() = file.getParentContainer() + or + result = getAFileInPack().(Folder).getAChildContainer() + } + + /** + * Gets a QLPack that this QLPack depends on. + */ + QLPack getADependency() { + exists(string name | hasDependency(name, _) | result.getName().replaceAll("-", "/") = name) + } + + Location getLocation() { + // hacky, just pick the first node in the file. + result = + min(YAMLNode entry, Location l, File f | + entry.getLocation().getFile() = file and + f = file and + l = entry.getLocation() + | + entry order by l.getStartLine(), l.getStartColumn(), l.getEndColumn(), l.getEndLine() + ).getLocation() + } + } +} diff --git a/ql/src/codeql_ql/ast/internal/AstNodes.qll b/ql/src/codeql_ql/ast/internal/AstNodes.qll index 11c45279a5e..c7e73105b4c 100644 --- a/ql/src/codeql_ql/ast/internal/AstNodes.qll +++ b/ql/src/codeql_ql/ast/internal/AstNodes.qll @@ -1,5 +1,6 @@ import codeql_ql.ast.Ast as AST import TreeSitter +private import Builtins cached newtype TAstNode = @@ -10,6 +11,7 @@ newtype TAstNode = TClass(Generated::Dataclass dc) or TCharPred(Generated::Charpred pred) or TClassPredicate(Generated::MemberPredicate pred) or + TDBRelation(Generated::DbTable table) or TSelect(Generated::Select sel) or TModule(Generated::Module mod) or TNewType(Generated::Datatype dt) or @@ -57,7 +59,18 @@ newtype TAstNode = TUnaryExpr(Generated::UnaryExpr unaryexpr) or TDontCare(Generated::Underscore dontcare) or TModuleExpr(Generated::ModuleExpr me) or - TPredicateExpr(Generated::PredicateExpr pe) + TPredicateExpr(Generated::PredicateExpr pe) or + TAnnotation(Generated::Annotation annot) or + TAnnotationArg(Generated::AnnotArg arg) or + TYamlCommemt(Generated::YamlComment yc) or + TYamlEntry(Generated::YamlEntry ye) or + TYamlKey(Generated::YamlKey yk) or + TYamlListitem(Generated::YamlListitem yli) or + TYamlValue(Generated::YamlValue yv) or + TBuiltinClassless(string ret, string name, string args) { isBuiltinClassless(ret, name, args) } or + TBuiltinMember(string qual, string ret, string name, string args) { + isBuiltinMember(qual, ret, name, args) + } class TFormula = TDisjunction or TConjunction or TComparisonFormula or TQuantifier or TNegation or TIfFormula or @@ -75,6 +88,8 @@ class TCall = TPredicateCall or TMemberCall or TNoneCall or TAnyCall; class TModuleRef = TImport or TModuleExpr; +class TYAMLNode = TYamlCommemt or TYamlEntry or TYamlKey or TYamlListitem or TYamlValue; + private Generated::AstNode toGeneratedFormula(AST::AstNode n) { n = TConjunction(result) or n = TDisjunction(result) or @@ -105,6 +120,14 @@ private Generated::AstNode toGeneratedExpr(AST::AstNode n) { n = TDontCare(result) } +private Generated::AstNode toGenerateYAML(AST::AstNode n) { + n = TYamlCommemt(result) or + n = TYamlEntry(result) or + n = TYamlKey(result) or + n = TYamlListitem(result) or + n = TYamlValue(result) +} + /** * Gets the underlying TreeSitter entity for a given AST node. */ @@ -113,6 +136,8 @@ Generated::AstNode toGenerated(AST::AstNode n) { or result = toGeneratedFormula(n) or + result = toGenerateYAML(n) + or result.(Generated::ParExpr).getChild() = toGenerated(n) or result = @@ -135,6 +160,8 @@ Generated::AstNode toGenerated(AST::AstNode n) { or n = TClassPredicate(result) or + n = TDBRelation(result) + or n = TSelect(result) or n = TModule(result) @@ -164,9 +191,17 @@ Generated::AstNode toGenerated(AST::AstNode n) { n = TAnyCall(result) or n = TSuper(result) + or + n = TAnnotation(result) + or + n = TAnnotationArg(result) } -class TPredicate = TCharPred or TClasslessPredicate or TClassPredicate; +class TPredicate = TCharPred or TClasslessPredicate or TClassPredicate or TDBRelation; + +class TPredOrBuiltin = TPredicate or TNewTypeBranch or TBuiltin; + +class TBuiltin = TBuiltinClassless or TBuiltinMember; class TModuleMember = TModuleDeclaration or TImport or TSelect or TQLDoc; diff --git a/ql/src/codeql_ql/ast/internal/Builtins.qll b/ql/src/codeql_ql/ast/internal/Builtins.qll index 072bb8299f2..9e1d45d5242 100644 --- a/ql/src/codeql_ql/ast/internal/Builtins.qll +++ b/ql/src/codeql_ql/ast/internal/Builtins.qll @@ -65,3 +65,18 @@ string getArgType(string args, int i) { result = args.splitAt(",", i).trim() } class StringClass extends PrimitiveType { StringClass() { this.getName() = "string" } } + +/** The primitive 'int' class. */ +class IntClass extends PrimitiveType { + IntClass() { this.getName() = "int" } +} + +/** The primitive 'float' class. */ +class FloatClass extends PrimitiveType { + FloatClass() { this.getName() = "float" } +} + +/** The primitive 'boolean' class. */ +class BooleanClass extends PrimitiveType { + BooleanClass() { this.getName() = "boolean" } +} diff --git a/ql/src/codeql_ql/ast/internal/Module.qll b/ql/src/codeql_ql/ast/internal/Module.qll index 1443876c30d..a64ccfd0f2d 100644 --- a/ql/src/codeql_ql/ast/internal/Module.qll +++ b/ql/src/codeql_ql/ast/internal/Module.qll @@ -100,8 +100,10 @@ private predicate resolveQualifiedName(Import imp, ContainerOrModule m, int i) { exists(Container c, Container parent | // should ideally look at `qlpack.yml` files parent = imp.getLocation().getFile().getParentContainer+() and - exists(parent.getFile("qlpack.yml")) and - c.getParentContainer() = parent and + exists(YAML::QLPack pack | + pack.getFile().getParentContainer() = parent and + c.getParentContainer() = pack.getADependency*().getFile().getParentContainer() + ) and q = m.getName() | m = TFile(c) diff --git a/ql/src/codeql_ql/ast/internal/Predicate.qll b/ql/src/codeql_ql/ast/internal/Predicate.qll index 5f746237ddc..31c91c7ae6d 100644 --- a/ql/src/codeql_ql/ast/internal/Predicate.qll +++ b/ql/src/codeql_ql/ast/internal/Predicate.qll @@ -1,12 +1,11 @@ import ql private import Builtins private import codeql_ql.ast.internal.Module -private import codeql_ql.ast.internal.AstNodes as AstNodes +private import codeql_ql.ast.internal.AstNodes -private class TClasslessPredicateOrNewTypeBranch = - AstNodes::TClasslessPredicate or AstNodes::TNewTypeBranch; +private class TClasslessPredicateOrNewTypeBranch = TClasslessPredicate or TNewTypeBranch; -string getPredicateName(TClasslessPredicateOrNewTypeBranch p) { +private string getPredicateName(TClasslessPredicateOrNewTypeBranch p) { result = p.(ClasslessPredicate).getName() or result = p.(NewTypeBranch).getName() } @@ -69,8 +68,7 @@ private module Cached { m = pc.getQualifier().getResolvedModule() and public = true | - definesPredicate(m, pc.getPredicateName(), pc.getNumberOfArguments(), p.getDeclaration(), - public) + definesPredicate(m, pc.getPredicateName(), pc.getNumberOfArguments(), p, public) ) } @@ -88,148 +86,41 @@ private module Cached { ) } + pragma[noinline] + private predicate candidate(Relation rel, PredicateCall pc) { + rel.getName() = pc.getPredicateName() + } + + private predicate resolveDBRelation(PredicateCall pc, Predicate p) { + exists(Relation rel | p = rel | + candidate(rel, pc) and + rel.getArity() = pc.getNumberOfArguments() and + ( + exists(YAML::QLPack libPack, YAML::QLPack qlPack | + rel.getLocation().getFile() = libPack.getDBScheme() and + qlPack.getADependency*() = libPack and + qlPack.getAFileInPack() = pc.getLocation().getFile() + ) + or + // upgrade scripts don't have a qlpack + rel.getLocation().getFile().getParentContainer() = + pc.getLocation().getFile().getParentContainer() + ) + ) + } + cached predicate resolveCall(Call c, PredicateOrBuiltin p) { resolvePredicateCall(c, p) or resolveMemberCall(c, p) - } - - cached - module NewTypeDef { - cached - newtype TPredOrBuiltin = - TPred(Predicate p) or - TNewTypeBranch(NewTypeBranch b) or - TBuiltinClassless(string ret, string name, string args) { - isBuiltinClassless(ret, name, args) - } or - TBuiltinMember(string qual, string ret, string name, string args) { - isBuiltinMember(qual, ret, name, args) - } + or + not resolvePredicateCall(c, _) and + resolveDBRelation(c, p) } } import Cached -private import NewTypeDef - -class PredicateOrBuiltin extends TPredOrBuiltin { - string getName() { none() } - - string toString() { result = getName() } - - predicate hasLocationInfo( - string filepath, int startline, int startcolumn, int endline, int endcolumn - ) { - if exists(getDeclaration()) - then - getDeclaration() - .getLocation() - .hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) - else ( - filepath = "" and - startline = 0 and - startcolumn = 0 and - endline = 0 and - endcolumn = 0 - ) - } - - AstNode getDeclaration() { none() } - - Type getDeclaringType() { none() } - - Type getParameterType(int i) { none() } - - Type getReturnType() { none() } - - int getArity() { result = count(getParameterType(_)) } - - predicate isPrivate() { none() } -} - -private class DefinedPredicate extends PredicateOrBuiltin, TPred { - Predicate decl; - - DefinedPredicate() { this = TPred(decl) } - - override Predicate getDeclaration() { result = decl } - - override string getName() { result = decl.getName() } - - override Type getReturnType() { result = decl.getReturnType() } - - override Type getParameterType(int i) { result = decl.getParameter(i).getType() } - - // Can be removed when all types can be resolved - override int getArity() { result = decl.getArity() } - - override Type getDeclaringType() { - result = decl.(ClassPredicate).getDeclaringType() - or - result = decl.(CharPred).getDeclaringType() - } - - override predicate isPrivate() { - decl.(ClassPredicate).isPrivate() or decl.(ClassPredicate).isPrivate() - } -} - -private class DefinedNewTypeBranch extends PredicateOrBuiltin, TNewTypeBranch { - NewTypeBranch b; - - DefinedNewTypeBranch() { this = TNewTypeBranch(b) } - - override NewTypeBranch getDeclaration() { result = b } - - override string getName() { result = b.getName() } - - override NewTypeBranchType getReturnType() { result.getDeclaration() = b } - - override Type getParameterType(int i) { result = b.getField(i).getType() } - - // Can be removed when all types can be resolved - override int getArity() { result = count(b.getField(_)) } - - override Type getDeclaringType() { none() } - - override predicate isPrivate() { b.getNewType().isPrivate() } -} - -private class TBuiltin = TBuiltinClassless or TBuiltinMember; - -class BuiltinPredicate extends PredicateOrBuiltin, TBuiltin { } - -private class BuiltinClassless extends BuiltinPredicate, TBuiltinClassless { - string name; - string ret; - string args; - - BuiltinClassless() { this = TBuiltinClassless(ret, name, args) } - - override string getName() { result = name } - - override PrimitiveType getReturnType() { result.getName() = ret } - - override PrimitiveType getParameterType(int i) { result.getName() = getArgType(args, i) } -} - -private class BuiltinMember extends BuiltinPredicate, TBuiltinMember { - string name; - string qual; - string ret; - string args; - - BuiltinMember() { this = TBuiltinMember(qual, ret, name, args) } - - override string getName() { result = name } - - override PrimitiveType getReturnType() { result.getName() = ret } - - override PrimitiveType getParameterType(int i) { result.getName() = getArgType(args, i) } - - override PrimitiveType getDeclaringType() { result.getName() = qual } -} module PredConsistency { query predicate noResolvePredicateExpr(PredicateExpr pe) { @@ -261,7 +152,7 @@ module PredConsistency { strictcount(PredicateOrBuiltin p0 | resolveCall(call, p0) and // aliases are expected to resolve to multiple. - not exists(p0.getDeclaration().(ClasslessPredicate).getAlias()) + not exists(p0.(ClasslessPredicate).getAlias()) ) and c > 1 and resolveCall(call, p) diff --git a/ql/src/codeql_ql/ast/internal/Type.qll b/ql/src/codeql_ql/ast/internal/Type.qll index 32e1000f426..ee4c5dc8400 100644 --- a/ql/src/codeql_ql/ast/internal/Type.qll +++ b/ql/src/codeql_ql/ast/internal/Type.qll @@ -114,6 +114,7 @@ private PredicateOrBuiltin declaredPred(Type ty, string name, int arity) { result.getArity() = arity } +pragma[nomagic] private PredicateOrBuiltin classPredCandidate(Type ty, string name, int arity) { result = declaredPred(ty, name, arity) or @@ -127,8 +128,7 @@ private PredicateOrBuiltin inherClassPredCandidate(Type ty, string name, int ari } predicate predOverrides(ClassPredicate sub, ClassPredicate sup) { - sup = - inherClassPredCandidate(sub.getDeclaringType(), sub.getName(), sub.getArity()).getDeclaration() + sup = inherClassPredCandidate(sub.getDeclaringType(), sub.getName(), sub.getArity()) } private VarDecl declaredField(ClassType ty, string name) { @@ -283,6 +283,7 @@ private predicate qualifier(TypeExpr te, FileOrModule m, boolean public, string ) } +pragma[nomagic] private predicate defines(FileOrModule m, string name, Type t, boolean public) { exists(Class ty | t = TClass(ty) | getEnclosingModule(ty) = m and diff --git a/ql/src/ide-contextual-queries/Definitions.qll b/ql/src/ide-contextual-queries/Definitions.qll index 2588594f14a..eef1eb9b71d 100644 --- a/ql/src/ide-contextual-queries/Definitions.qll +++ b/ql/src/ide-contextual-queries/Definitions.qll @@ -54,7 +54,7 @@ private predicate resolveField(FieldAccess va, VarDecl decl, string kind) { } private predicate resolveCall(Call c, Predicate p, string kind) { - p = c.getTarget().getDeclaration() and + p = c.getTarget() and kind = "call" } diff --git a/ql/src/qlpack.yml b/ql/src/qlpack.yml index 5ee08309518..675067cdcc0 100644 --- a/ql/src/qlpack.yml +++ b/ql/src/qlpack.yml @@ -2,4 +2,5 @@ name: codeql-ql version: 0.0.0 dbscheme: ql.dbscheme suites: codeql-suites +defaultSuiteFile: codeql-suites/ql-code-scanning.qls extractor: ql diff --git a/ql/src/queries/performance/ClassPredicateDoesntMentionThis.ql b/ql/src/queries/performance/ClassPredicateDoesntMentionThis.ql new file mode 100644 index 00000000000..94fe2067486 --- /dev/null +++ b/ql/src/queries/performance/ClassPredicateDoesntMentionThis.ql @@ -0,0 +1,86 @@ +/** + * @name Class predicate doesn't mention `this` + * @description A class predicate that doesn't use `this` (or a field) could instead be a classless predicate, and may cause a cartesian product. + * @kind problem + * @problem.severity warning + * @id ql/class-predicate-doesnt-use-this + * @tags performance + * @precision medium + */ + +import ql + +predicate usesThis(ClassPredicate pred) { + exists(ThisAccess th | th.getEnclosingPredicate() = pred) + or + exists(Super sup | sup.getEnclosingPredicate() = pred) + or + exists(FieldAccess f | f.getEnclosingPredicate() = pred) + or + // implicit this + exists(PredicateCall pc | pc.getEnclosingPredicate() = pred | + pc.getTarget() instanceof ClassPredicate + ) +} + +predicate isLiteralComparison(ComparisonFormula eq) { + exists(Expr lhs, Expr rhs | + eq.getSymbol() = "=" and + eq.getAnOperand() = lhs and + eq.getAnOperand() = rhs and + ( + lhs instanceof ResultAccess + or + lhs instanceof ThisAccess + or + lhs instanceof VarAccess + ) and + ( + rhs instanceof Literal + or + exists(NewTypeBranch nt | + rhs.(Call).getTarget() = nt and + count(nt.getField(_)) = 0 + ) + ) + ) +} + +predicate conjParent(Formula par, Formula child) { child = par.(Conjunction).getAnOperand() } + +predicate isLiteralComparisons(Formula f) { + forex(ComparisonFormula child | conjParent*(f, child) | isLiteralComparison(child)) +} + +predicate isTrivialImplementation(Predicate pred) { + not exists(pred.getBody()) + or + exists(Formula bod | bod = pred.getBody() | + bod instanceof AnyCall + or + bod instanceof NoneCall + or + isLiteralComparisons(bod) + ) +} + +predicate isSingleton(Type ty) { + isTrivialImplementation(ty.(ClassType).getDeclaration().getCharPred()) + or + isSingleton(ty.getASuperType()) + or + exists(NewTypeBranch br | count(br.getField(_)) = 0 | + ty.(NewTypeBranchType).getDeclaration() = br + or + br = unique(NewTypeBranch br2 | br2 = ty.(NewTypeType).getDeclaration().getABranch()) + ) +} + +from ClassPredicate pred +where + not usesThis(pred) and + not isTrivialImplementation(pred) and + not isSingleton(pred.getDeclaringType()) and + not exists(ClassPredicate other | pred.overrides(other) or other.overrides(pred)) and + not pred.isOverride() +select pred, "This predicate could be a classless predicate, as it doesn't depend on `this`." diff --git a/ql/src/queries/performance/DontUseGetAQlClass.ql b/ql/src/queries/performance/DontUseGetAQlClass.ql new file mode 100644 index 00000000000..adbcb0baccc --- /dev/null +++ b/ql/src/queries/performance/DontUseGetAQlClass.ql @@ -0,0 +1,22 @@ +/** + * @name Don't use getAQlClass. + * @description Any use of getAQlClass causes both compile-time and runtime to be significantly slower. + * @kind problem + * @problem.severity warning + * @id ql/dont-use-getaqlclass + * @tags performance + * @precision very-high + */ + +import ql + +from Call call +where + ( + call.(PredicateCall).getPredicateName() = "getAQlClass" or + call.(MemberCall).getMemberName() = "getAQlClass" + ) and + not call.getLocation().getFile().getAbsolutePath().matches("%/" + ["meta", "test"] + "/%") and + not call.getLocation().getFile().getBaseName().toLowerCase() = + ["consistency.ql", "test.ql", "tst.ql", "tests.ql"] +select call, "Don't use .getAQlClass" diff --git a/ql/src/queries/performance/MissingNoinline.ql b/ql/src/queries/performance/MissingNoinline.ql new file mode 100644 index 00000000000..bd20ee9e459 --- /dev/null +++ b/ql/src/queries/performance/MissingNoinline.ql @@ -0,0 +1,23 @@ +/** + * @name Missing `noinline` or `nomagic` annotation + * @description When a predicate is factored out to improve join-ordering, it should be marked as `noinline` or `nomagic`. + * @kind problem + * @problem.severity error + * @id ql/missing-noinline + * @tags performance + * @precision high + */ + +import ql + +from QLDoc doc, Predicate decl +where + doc.getContents().matches(["%join order%", "%join-order%"]) and + decl.getQLDoc() = doc and + not decl.getAnAnnotation() instanceof NoInline and + not decl.getAnAnnotation() instanceof NoMagic and + not decl.getAnAnnotation() instanceof NoOpt and + // If it's marked as inline it's probably because the QLDoc says something like + // "this predicate is inlined because it gives a better join-order". + not decl.getAnAnnotation() instanceof Inline +select decl, "This predicate might be inlined." diff --git a/ql/src/queries/style/ImplicitThis.ql b/ql/src/queries/style/ImplicitThis.ql new file mode 100644 index 00000000000..cbc25d2c145 --- /dev/null +++ b/ql/src/queries/style/ImplicitThis.ql @@ -0,0 +1,34 @@ +/** + * @name Using implicit `this` + * @description Writing member predicate calls with an implicit `this` can be confusing + * @kind problem + * @problem.severity recommendation + * @precision very-high + * @id ql/implicit-this + * @tags maintainability + */ + +import ql + +MemberCall explicitThisCallInFile(File f) { + result.getLocation().getFile() = f and + result.getBase() instanceof ThisAccess and + // Exclude `this.(Type).whatever(...)`, as some files have that as their only instance of `this`. + not result = any(InlineCast c).getBase() +} + +PredicateCall implicitThisCallInFile(File f) { + result.getLocation().getFile() = f and + exists(result.getTarget().getDeclaringType().getASuperType()) and + // Exclude `SomeModule::whatever(...)` + not exists(result.getQualifier()) +} + +PredicateCall confusingImplicitThisCall(File f) { + result = implicitThisCallInFile(f) and + exists(explicitThisCallInFile(f)) +} + +from PredicateCall c +where c = confusingImplicitThisCall(_) +select c, "Use of implicit `this`." diff --git a/ql/src/queries/style/SuperfluousExists.ql b/ql/src/queries/style/SuperfluousExists.ql new file mode 100644 index 00000000000..84380330fa8 --- /dev/null +++ b/ql/src/queries/style/SuperfluousExists.ql @@ -0,0 +1,96 @@ +/** + * @name Superfluous 'exists' conjunct. + * @description Writing 'exists(x)' when the existence of X is implied by another conjunct is bad practice. + * @kind problem + * @problem.severity warning + * @precision high + * @id ql/superfluous-exists + * @tags maintainability + */ + +import ql +import codeql.GlobalValueNumbering + +/** + * Gets an operand of this conjunction (we need the restriction + * to `Conjunction` to get the correct transitive closure). + */ +Formula getAConjOperand(Conjunction conj) { result = conj.getAnOperand() } + +/** A conjunction that is not a operand of another conjunction. */ +class TopLevelConjunction extends Conjunction { + TopLevelConjunction() { not this = getAConjOperand(_) } + + /** Gets a formula within this conjunction that is not itself a conjunction. */ + Formula getAnAtom() { + not result instanceof Conjunction and + result = getAConjOperand*(this) + } +} + +/** + * Holds if the existence of `e` implies the existence of `vn`. For instance, the existence of + * `1 + x` implies the existence of a value number `vn` such that `vn.getAnExpr() = x`. + */ +predicate exprImpliesExists(ValueNumber vn, Expr e) { + vn.getAnExpr() = e + or + exprImpliesExists(vn, e.(BinOpExpr).getAnOperand()) + or + exprImpliesExists(vn, e.(InlineCast).getBase()) + or + exprImpliesExists(vn, e.(PredicateCall).getAnArgument()) + or + exprImpliesExists(vn, [e.(MemberCall).getAnArgument(), e.(MemberCall).getBase()]) + or + exprImpliesExists(vn, e.(UnaryExpr).getOperand()) + or + exprImpliesExists(vn, e.(ExprAnnotation).getExpression()) + or + forex(Formula child | child = e.(Set).getAnElement() | exprImpliesExists(vn, child)) + or + exprImpliesExists(vn, e.(AsExpr).getInnerExpr()) + or + exists(ExprAggregate agg | + agg = e and + agg.getKind().matches(["strict%", "unique"]) and + exprImpliesExists(vn, agg.getExpr(0)) + ) +} + +/** + * Holds if the satisfiability of `f` implies the existence of `vn`. For instance, if `x.foo()` is + * satisfied, the value number `vn` such that `vn.getAnExpr() = x` exists. + */ +predicate formulaImpliesExists(ValueNumber vn, Formula f) { + forex(Formula child | child = f.(Disjunction).getAnOperand() | formulaImpliesExists(vn, child)) + or + formulaImpliesExists(vn, f.(Conjunction).getAnOperand()) + or + exprImpliesExists(vn, f.(ComparisonFormula).getAnOperand()) + or + exists(IfFormula ifFormula | + ifFormula = f and + formulaImpliesExists(vn, ifFormula.getThenPart()) and + formulaImpliesExists(vn, ifFormula.getElsePart()) + ) + or + exprImpliesExists(vn, f.(InstanceOf).getExpr()) + or + exprImpliesExists(vn, f.(PredicateCall).getAnArgument()) + or + exprImpliesExists(vn, [f.(MemberCall).getAnArgument(), f.(MemberCall).getBase()]) + or + exists(InFormula inFormula | inFormula = f | + exprImpliesExists(vn, [inFormula.getExpr(), inFormula.getRange()]) + ) +} + +from TopLevelConjunction toplevel, Exists existsFormula, ValueNumber vn, Formula conjunct +where + existsFormula = toplevel.getAnAtom() and + vn.getAnExpr() = existsFormula.getExpr() and + conjunct = toplevel.getAnAtom() and + formulaImpliesExists(vn, conjunct) +select existsFormula, "This conjunct is superfluous as the existence is implied by $@.", conjunct, + "this conjunct" diff --git a/ql/src/queries/style/docs/ClassDocs.ql b/ql/src/queries/style/docs/ClassDocs.ql new file mode 100644 index 00000000000..1dad0867996 --- /dev/null +++ b/ql/src/queries/style/docs/ClassDocs.ql @@ -0,0 +1,26 @@ +/** + * @name Class QLDoc style. + * @description The QLDoc for a class should start with "A", "An", or "The". + * @kind problem + * @problem.severity warning + * @id ql/class-doc-style + * @tags maintainability + * @precision very-high + */ + +import ql + +bindingset[s] +predicate badStyle(string s) { + not s.replaceAll("/**", "") + .replaceAll("*", "") + .splitAt("\n") + .trim() + .matches(["A %", "An %", "The %", "INTERNAL%", "DEPRECATED%"]) +} + +from Class c +where + badStyle(c.getQLDoc().getContents()) and + not c.isPrivate() +select c.getQLDoc(), "The QLDoc for a class should start with 'A', 'An', or 'The'." diff --git a/ql/src/queries/style/docs/NonUSSpelling.ql b/ql/src/queries/style/docs/NonUSSpelling.ql new file mode 100644 index 00000000000..8861629ca87 --- /dev/null +++ b/ql/src/queries/style/docs/NonUSSpelling.ql @@ -0,0 +1,38 @@ +/** + * @name Non US spelling + * @description QLDocs shold use US spelling. + * @kind problem + * @problem.severity warning + * @id ql/non-us-spelling + * @tags maintainability + * @precision very-high + */ + +import ql + +predicate non_us_word(string wrong, string right) { + exists(string s | + wrong = s.splitAt("/", 0) and + right = s.splitAt("/", 1) and + s = ["colour/color", "authorise/authorize", "analyse/analyze"] + ) +} + +bindingset[s] +predicate contains_non_us_spelling(string s, string wrong, string right) { + non_us_word(wrong, right) and + ( + s.matches("%" + wrong + "%") and + wrong != "analyse" + or + // analyses (as a noun) is fine + s.regexpMatch(".*analyse[^s].*") and + wrong = "analyse" + ) +} + +from QLDoc doc, string wrong, string right +where contains_non_us_spelling(doc.getContents().toLowerCase(), wrong, right) +select doc, + "This QLDoc comment contains the non-US spelling '" + wrong + "', which should instead be '" + + right + "'." diff --git a/ql/test/callgraph/callgraph.ql b/ql/test/callgraph/callgraph.ql index ef5aecf1bbc..4b68e49e361 100644 --- a/ql/test/callgraph/callgraph.ql +++ b/ql/test/callgraph/callgraph.ql @@ -1,3 +1,3 @@ import ql -query AstNode getTarget(Call call) { result = call.getTarget().getDeclaration() } +query AstNode getTarget(Call call) { result = call.getTarget() } diff --git a/ql/test/printAst/printAst.expected b/ql/test/printAst/printAst.expected index 5b484e1008d..7b296161d4d 100644 --- a/ql/test/printAst/printAst.expected +++ b/ql/test/printAst/printAst.expected @@ -29,159 +29,239 @@ nodes | Foo.qll:6:30:6:30 | ComparisonOp | semmle.order | 14 | | Foo.qll:6:32:6:36 | String | semmle.label | [String] String | | Foo.qll:6:32:6:36 | String | semmle.order | 15 | +| Foo.qll:9:1:9:5 | annotation | semmle.label | [Annotation] annotation | +| Foo.qll:9:1:9:5 | annotation | semmle.order | 16 | | Foo.qll:9:7:11:1 | ClasslessPredicate foo | semmle.label | [ClasslessPredicate] ClasslessPredicate foo | -| Foo.qll:9:7:11:1 | ClasslessPredicate foo | semmle.order | 16 | +| Foo.qll:9:7:11:1 | ClasslessPredicate foo | semmle.order | 17 | | Foo.qll:9:21:9:23 | TypeExpr | semmle.label | [TypeExpr] TypeExpr | -| Foo.qll:9:21:9:23 | TypeExpr | semmle.order | 17 | +| Foo.qll:9:21:9:23 | TypeExpr | semmle.order | 18 | | Foo.qll:9:21:9:25 | f | semmle.label | [VarDecl] f | -| Foo.qll:9:21:9:25 | f | semmle.order | 17 | +| Foo.qll:9:21:9:25 | f | semmle.order | 18 | | Foo.qll:10:3:10:3 | f | semmle.label | [VarAccess] f | -| Foo.qll:10:3:10:3 | f | semmle.order | 19 | +| Foo.qll:10:3:10:3 | f | semmle.order | 20 | | Foo.qll:10:3:10:85 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:10:3:10:85 | ComparisonFormula | semmle.order | 19 | +| Foo.qll:10:3:10:85 | ComparisonFormula | semmle.order | 20 | | Foo.qll:10:5:10:5 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:10:5:10:5 | ComparisonOp | semmle.order | 21 | +| Foo.qll:10:5:10:5 | ComparisonOp | semmle.order | 22 | | Foo.qll:10:7:10:85 | Rank | semmle.label | [Rank] Rank | -| Foo.qll:10:7:10:85 | Rank | semmle.order | 22 | +| Foo.qll:10:7:10:85 | Rank | semmle.order | 23 | | Foo.qll:10:12:10:12 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:10:12:10:12 | Integer | semmle.order | 23 | +| Foo.qll:10:12:10:12 | Integer | semmle.order | 24 | | Foo.qll:10:15:10:17 | TypeExpr | semmle.label | [TypeExpr] TypeExpr | -| Foo.qll:10:15:10:17 | TypeExpr | semmle.order | 24 | +| Foo.qll:10:15:10:17 | TypeExpr | semmle.order | 25 | | Foo.qll:10:15:10:23 | inner | semmle.label | [VarDecl] inner | -| Foo.qll:10:15:10:23 | inner | semmle.order | 24 | +| Foo.qll:10:15:10:23 | inner | semmle.order | 25 | | Foo.qll:10:27:10:31 | inner | semmle.label | [VarAccess] inner | -| Foo.qll:10:27:10:31 | inner | semmle.order | 26 | +| Foo.qll:10:27:10:31 | inner | semmle.order | 27 | | Foo.qll:10:27:10:42 | MemberCall | semmle.label | [MemberCall] MemberCall | -| Foo.qll:10:27:10:42 | MemberCall | semmle.order | 26 | +| Foo.qll:10:27:10:42 | MemberCall | semmle.order | 27 | | Foo.qll:10:27:10:50 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:10:27:10:50 | ComparisonFormula | semmle.order | 26 | +| Foo.qll:10:27:10:50 | ComparisonFormula | semmle.order | 27 | | Foo.qll:10:44:10:44 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:10:44:10:44 | ComparisonOp | semmle.order | 29 | +| Foo.qll:10:44:10:44 | ComparisonOp | semmle.order | 30 | | Foo.qll:10:46:10:50 | String | semmle.label | [String] String | -| Foo.qll:10:46:10:50 | String | semmle.order | 30 | +| Foo.qll:10:46:10:50 | String | semmle.order | 31 | | Foo.qll:10:54:10:58 | inner | semmle.label | [VarAccess] inner | -| Foo.qll:10:54:10:58 | inner | semmle.order | 31 | +| Foo.qll:10:54:10:58 | inner | semmle.order | 32 | | Foo.qll:10:69:10:73 | inner | semmle.label | [VarAccess] inner | -| Foo.qll:10:69:10:73 | inner | semmle.order | 32 | +| Foo.qll:10:69:10:73 | inner | semmle.order | 33 | | Foo.qll:10:69:10:84 | MemberCall | semmle.label | [MemberCall] MemberCall | -| Foo.qll:10:69:10:84 | MemberCall | semmle.order | 32 | +| Foo.qll:10:69:10:84 | MemberCall | semmle.order | 33 | | Foo.qll:13:1:27:1 | ClasslessPredicate calls | semmle.label | [ClasslessPredicate] ClasslessPredicate calls | -| Foo.qll:13:1:27:1 | ClasslessPredicate calls | semmle.order | 34 | +| Foo.qll:13:1:27:1 | ClasslessPredicate calls | semmle.order | 35 | | Foo.qll:13:17:13:19 | TypeExpr | semmle.label | [TypeExpr] TypeExpr | -| Foo.qll:13:17:13:19 | TypeExpr | semmle.order | 35 | +| Foo.qll:13:17:13:19 | TypeExpr | semmle.order | 36 | | Foo.qll:13:17:13:21 | f | semmle.label | [VarDecl] f | -| Foo.qll:13:17:13:21 | f | semmle.order | 35 | +| Foo.qll:13:17:13:21 | f | semmle.order | 36 | | Foo.qll:14:3:14:10 | PredicateCall | semmle.label | [PredicateCall] PredicateCall | -| Foo.qll:14:3:14:10 | PredicateCall | semmle.order | 37 | +| Foo.qll:14:3:14:10 | PredicateCall | semmle.order | 38 | | Foo.qll:14:3:16:29 | Disjunction | semmle.label | [Disjunction] Disjunction | -| Foo.qll:14:3:16:29 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:16:29 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:18:28 | Disjunction | semmle.label | [Disjunction] Disjunction | -| Foo.qll:14:3:18:28 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:18:28 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:20:13 | Disjunction | semmle.label | [Disjunction] Disjunction | -| Foo.qll:14:3:20:13 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:20:13 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:22:16 | Disjunction | semmle.label | [Disjunction] Disjunction | -| Foo.qll:14:3:22:16 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:22:16 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:24:23 | Disjunction | semmle.label | [Disjunction] Disjunction | -| Foo.qll:14:3:24:23 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:24:23 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:26:14 | Disjunction | semmle.label | [Disjunction] Disjunction | -| Foo.qll:14:3:26:14 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:26:14 | Disjunction | semmle.order | 38 | | Foo.qll:14:9:14:9 | f | semmle.label | [VarAccess] f | -| Foo.qll:14:9:14:9 | f | semmle.order | 44 | +| Foo.qll:14:9:14:9 | f | semmle.order | 45 | | Foo.qll:16:3:16:7 | String | semmle.label | [String] String | -| Foo.qll:16:3:16:7 | String | semmle.order | 45 | +| Foo.qll:16:3:16:7 | String | semmle.order | 46 | | Foo.qll:16:3:16:29 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:16:3:16:29 | ComparisonFormula | semmle.order | 45 | +| Foo.qll:16:3:16:29 | ComparisonFormula | semmle.order | 46 | | Foo.qll:16:9:16:9 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:16:9:16:9 | ComparisonOp | semmle.order | 47 | +| Foo.qll:16:9:16:9 | ComparisonOp | semmle.order | 48 | | Foo.qll:16:11:16:11 | f | semmle.label | [VarAccess] f | -| Foo.qll:16:11:16:11 | f | semmle.order | 48 | +| Foo.qll:16:11:16:11 | f | semmle.order | 49 | | Foo.qll:16:11:16:29 | MemberCall | semmle.label | [MemberCall] MemberCall | -| Foo.qll:16:11:16:29 | MemberCall | semmle.order | 48 | +| Foo.qll:16:11:16:29 | MemberCall | semmle.order | 49 | | Foo.qll:16:22:16:22 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:16:22:16:22 | Integer | semmle.order | 50 | +| Foo.qll:16:22:16:22 | Integer | semmle.order | 51 | | Foo.qll:16:25:16:25 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:16:25:16:25 | Integer | semmle.order | 51 | +| Foo.qll:16:25:16:25 | Integer | semmle.order | 52 | | Foo.qll:16:28:16:28 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:16:28:16:28 | Integer | semmle.order | 52 | +| Foo.qll:16:28:16:28 | Integer | semmle.order | 53 | | Foo.qll:18:3:18:3 | f | semmle.label | [VarAccess] f | -| Foo.qll:18:3:18:3 | f | semmle.order | 53 | +| Foo.qll:18:3:18:3 | f | semmle.order | 54 | | Foo.qll:18:3:18:9 | InlineCast | semmle.label | [InlineCast] InlineCast | -| Foo.qll:18:3:18:9 | InlineCast | semmle.order | 53 | +| Foo.qll:18:3:18:9 | InlineCast | semmle.order | 54 | | Foo.qll:18:3:18:20 | MemberCall | semmle.label | [MemberCall] MemberCall | -| Foo.qll:18:3:18:20 | MemberCall | semmle.order | 53 | +| Foo.qll:18:3:18:20 | MemberCall | semmle.order | 54 | | Foo.qll:18:3:18:28 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:18:3:18:28 | ComparisonFormula | semmle.order | 53 | +| Foo.qll:18:3:18:28 | ComparisonFormula | semmle.order | 54 | | Foo.qll:18:6:18:8 | TypeExpr | semmle.label | [TypeExpr] TypeExpr | -| Foo.qll:18:6:18:8 | TypeExpr | semmle.order | 57 | +| Foo.qll:18:6:18:8 | TypeExpr | semmle.order | 58 | | Foo.qll:18:22:18:22 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:18:22:18:22 | ComparisonOp | semmle.order | 58 | +| Foo.qll:18:22:18:22 | ComparisonOp | semmle.order | 59 | | Foo.qll:18:24:18:28 | String | semmle.label | [String] String | -| Foo.qll:18:24:18:28 | String | semmle.order | 59 | +| Foo.qll:18:24:18:28 | String | semmle.order | 60 | | Foo.qll:20:3:20:3 | f | semmle.label | [VarAccess] f | -| Foo.qll:20:3:20:3 | f | semmle.order | 60 | +| Foo.qll:20:3:20:3 | f | semmle.order | 61 | | Foo.qll:20:3:20:9 | InlineCast | semmle.label | [InlineCast] InlineCast | -| Foo.qll:20:3:20:9 | InlineCast | semmle.order | 60 | +| Foo.qll:20:3:20:9 | InlineCast | semmle.order | 61 | | Foo.qll:20:3:20:13 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:20:3:20:13 | ComparisonFormula | semmle.order | 60 | +| Foo.qll:20:3:20:13 | ComparisonFormula | semmle.order | 61 | | Foo.qll:20:6:20:8 | TypeExpr | semmle.label | [TypeExpr] TypeExpr | -| Foo.qll:20:6:20:8 | TypeExpr | semmle.order | 63 | +| Foo.qll:20:6:20:8 | TypeExpr | semmle.order | 64 | | Foo.qll:20:11:20:11 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:20:11:20:11 | ComparisonOp | semmle.order | 64 | +| Foo.qll:20:11:20:11 | ComparisonOp | semmle.order | 65 | | Foo.qll:20:13:20:13 | f | semmle.label | [VarAccess] f | -| Foo.qll:20:13:20:13 | f | semmle.order | 65 | +| Foo.qll:20:13:20:13 | f | semmle.order | 66 | | Foo.qll:22:3:22:3 | f | semmle.label | [VarAccess] f | -| Foo.qll:22:3:22:3 | f | semmle.order | 66 | +| Foo.qll:22:3:22:3 | f | semmle.order | 67 | | Foo.qll:22:3:22:16 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:22:3:22:16 | ComparisonFormula | semmle.order | 66 | +| Foo.qll:22:3:22:16 | ComparisonFormula | semmle.order | 67 | | Foo.qll:22:5:22:5 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:22:5:22:5 | ComparisonOp | semmle.order | 68 | +| Foo.qll:22:5:22:5 | ComparisonOp | semmle.order | 69 | | Foo.qll:22:7:22:16 | FullAggregate[any] | semmle.label | [FullAggregate[any]] FullAggregate[any] | -| Foo.qll:22:7:22:16 | FullAggregate[any] | semmle.order | 69 | +| Foo.qll:22:7:22:16 | FullAggregate[any] | semmle.order | 70 | | Foo.qll:22:11:22:13 | TypeExpr | semmle.label | [TypeExpr] TypeExpr | -| Foo.qll:22:11:22:13 | TypeExpr | semmle.order | 70 | +| Foo.qll:22:11:22:13 | TypeExpr | semmle.order | 71 | | Foo.qll:22:11:22:15 | f | semmle.label | [VarDecl] f | -| Foo.qll:22:11:22:15 | f | semmle.order | 70 | +| Foo.qll:22:11:22:15 | f | semmle.order | 71 | | Foo.qll:24:3:24:3 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:24:3:24:3 | Integer | semmle.order | 72 | +| Foo.qll:24:3:24:3 | Integer | semmle.order | 73 | | Foo.qll:24:3:24:23 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:24:3:24:23 | ComparisonFormula | semmle.order | 72 | +| Foo.qll:24:3:24:23 | ComparisonFormula | semmle.order | 73 | | Foo.qll:24:5:24:5 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:24:5:24:5 | ComparisonOp | semmle.order | 74 | +| Foo.qll:24:5:24:5 | ComparisonOp | semmle.order | 75 | | Foo.qll:24:7:24:7 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:24:7:24:7 | Integer | semmle.order | 75 | +| Foo.qll:24:7:24:7 | Integer | semmle.order | 76 | | Foo.qll:24:7:24:23 | AddExpr | semmle.label | [AddExpr] AddExpr | -| Foo.qll:24:7:24:23 | AddExpr | semmle.order | 75 | +| Foo.qll:24:7:24:23 | AddExpr | semmle.order | 76 | | Foo.qll:24:12:24:12 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:24:12:24:12 | Integer | semmle.order | 77 | +| Foo.qll:24:12:24:12 | Integer | semmle.order | 78 | | Foo.qll:24:12:24:22 | AddExpr | semmle.label | [AddExpr] AddExpr | -| Foo.qll:24:12:24:22 | AddExpr | semmle.order | 77 | +| Foo.qll:24:12:24:22 | AddExpr | semmle.order | 78 | | Foo.qll:24:17:24:17 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:24:17:24:17 | Integer | semmle.order | 79 | +| Foo.qll:24:17:24:17 | Integer | semmle.order | 80 | | Foo.qll:24:17:24:21 | AddExpr | semmle.label | [AddExpr] AddExpr | -| Foo.qll:24:17:24:21 | AddExpr | semmle.order | 79 | +| Foo.qll:24:17:24:21 | AddExpr | semmle.order | 80 | | Foo.qll:24:21:24:21 | Integer | semmle.label | [Integer] Integer | -| Foo.qll:24:21:24:21 | Integer | semmle.order | 81 | +| Foo.qll:24:21:24:21 | Integer | semmle.order | 82 | | Foo.qll:26:3:26:6 | Boolean | semmle.label | [Boolean] Boolean | -| Foo.qll:26:3:26:6 | Boolean | semmle.order | 82 | +| Foo.qll:26:3:26:6 | Boolean | semmle.order | 83 | | Foo.qll:26:3:26:14 | ComparisonFormula | semmle.label | [ComparisonFormula] ComparisonFormula | -| Foo.qll:26:3:26:14 | ComparisonFormula | semmle.order | 82 | +| Foo.qll:26:3:26:14 | ComparisonFormula | semmle.order | 83 | | Foo.qll:26:8:26:8 | ComparisonOp | semmle.label | [ComparisonOp] ComparisonOp | -| Foo.qll:26:8:26:8 | ComparisonOp | semmle.order | 84 | +| Foo.qll:26:8:26:8 | ComparisonOp | semmle.order | 85 | | Foo.qll:26:10:26:14 | Boolean | semmle.label | [Boolean] Boolean | -| Foo.qll:26:10:26:14 | Boolean | semmle.order | 85 | +| Foo.qll:26:10:26:14 | Boolean | semmle.order | 86 | +| file://:0:0:0:0 | abs | semmle.label | [BuiltinPredicate] abs | +| file://:0:0:0:0 | abs | semmle.label | [BuiltinPredicate] abs | +| file://:0:0:0:0 | acos | semmle.label | [BuiltinPredicate] acos | +| file://:0:0:0:0 | any | semmle.label | [BuiltinPredicate] any | +| file://:0:0:0:0 | atan | semmle.label | [BuiltinPredicate] atan | +| file://:0:0:0:0 | bitAnd | semmle.label | [BuiltinPredicate] bitAnd | +| file://:0:0:0:0 | bitNot | semmle.label | [BuiltinPredicate] bitNot | +| file://:0:0:0:0 | bitOr | semmle.label | [BuiltinPredicate] bitOr | +| file://:0:0:0:0 | bitShiftLeft | semmle.label | [BuiltinPredicate] bitShiftLeft | +| file://:0:0:0:0 | bitShiftRight | semmle.label | [BuiltinPredicate] bitShiftRight | +| file://:0:0:0:0 | bitShiftRightSigned | semmle.label | [BuiltinPredicate] bitShiftRightSigned | +| file://:0:0:0:0 | bitXor | semmle.label | [BuiltinPredicate] bitXor | +| file://:0:0:0:0 | booleanAnd | semmle.label | [BuiltinPredicate] booleanAnd | +| file://:0:0:0:0 | booleanNot | semmle.label | [BuiltinPredicate] booleanNot | +| file://:0:0:0:0 | booleanOr | semmle.label | [BuiltinPredicate] booleanOr | +| file://:0:0:0:0 | booleanXor | semmle.label | [BuiltinPredicate] booleanXor | +| file://:0:0:0:0 | ceil | semmle.label | [BuiltinPredicate] ceil | +| file://:0:0:0:0 | charAt | semmle.label | [BuiltinPredicate] charAt | +| file://:0:0:0:0 | copySign | semmle.label | [BuiltinPredicate] copySign | +| file://:0:0:0:0 | cos | semmle.label | [BuiltinPredicate] cos | +| file://:0:0:0:0 | cosh | semmle.label | [BuiltinPredicate] cosh | +| file://:0:0:0:0 | daysTo | semmle.label | [BuiltinPredicate] daysTo | +| file://:0:0:0:0 | exp | semmle.label | [BuiltinPredicate] exp | +| file://:0:0:0:0 | floor | semmle.label | [BuiltinPredicate] floor | +| file://:0:0:0:0 | gcd | semmle.label | [BuiltinPredicate] gcd | +| file://:0:0:0:0 | getDay | semmle.label | [BuiltinPredicate] getDay | +| file://:0:0:0:0 | getHours | semmle.label | [BuiltinPredicate] getHours | +| file://:0:0:0:0 | getMinutes | semmle.label | [BuiltinPredicate] getMinutes | +| file://:0:0:0:0 | getMonth | semmle.label | [BuiltinPredicate] getMonth | +| file://:0:0:0:0 | getSeconds | semmle.label | [BuiltinPredicate] getSeconds | +| file://:0:0:0:0 | getYear | semmle.label | [BuiltinPredicate] getYear | +| file://:0:0:0:0 | indexOf | semmle.label | [BuiltinPredicate] indexOf | +| file://:0:0:0:0 | indexOf | semmle.label | [BuiltinPredicate] indexOf | +| file://:0:0:0:0 | isLowercase | semmle.label | [BuiltinPredicate] isLowercase | +| file://:0:0:0:0 | isUppercase | semmle.label | [BuiltinPredicate] isUppercase | +| file://:0:0:0:0 | length | semmle.label | [BuiltinPredicate] length | +| file://:0:0:0:0 | log | semmle.label | [BuiltinPredicate] log | +| file://:0:0:0:0 | log | semmle.label | [BuiltinPredicate] log | +| file://:0:0:0:0 | log2 | semmle.label | [BuiltinPredicate] log2 | +| file://:0:0:0:0 | log10 | semmle.label | [BuiltinPredicate] log10 | +| file://:0:0:0:0 | matches | semmle.label | [BuiltinPredicate] matches | +| file://:0:0:0:0 | maximum | semmle.label | [BuiltinPredicate] maximum | +| file://:0:0:0:0 | minimum | semmle.label | [BuiltinPredicate] minimum | +| file://:0:0:0:0 | nextAfter | semmle.label | [BuiltinPredicate] nextAfter | +| file://:0:0:0:0 | nextDown | semmle.label | [BuiltinPredicate] nextDown | +| file://:0:0:0:0 | nextUp | semmle.label | [BuiltinPredicate] nextUp | +| file://:0:0:0:0 | none | semmle.label | [BuiltinPredicate] none | +| file://:0:0:0:0 | pow | semmle.label | [BuiltinPredicate] pow | +| file://:0:0:0:0 | prefix | semmle.label | [BuiltinPredicate] prefix | +| file://:0:0:0:0 | regexpMatch | semmle.label | [BuiltinPredicate] regexpMatch | +| file://:0:0:0:0 | regexpReplaceAll | semmle.label | [BuiltinPredicate] regexpReplaceAll | +| file://:0:0:0:0 | replaceAll | semmle.label | [BuiltinPredicate] replaceAll | +| file://:0:0:0:0 | signum | semmle.label | [BuiltinPredicate] signum | +| file://:0:0:0:0 | sin | semmle.label | [BuiltinPredicate] sin | +| file://:0:0:0:0 | sinh | semmle.label | [BuiltinPredicate] sinh | +| file://:0:0:0:0 | splitAt | semmle.label | [BuiltinPredicate] splitAt | +| file://:0:0:0:0 | splitAt | semmle.label | [BuiltinPredicate] splitAt | +| file://:0:0:0:0 | sqrt | semmle.label | [BuiltinPredicate] sqrt | +| file://:0:0:0:0 | substring | semmle.label | [BuiltinPredicate] substring | +| file://:0:0:0:0 | suffix | semmle.label | [BuiltinPredicate] suffix | +| file://:0:0:0:0 | tan | semmle.label | [BuiltinPredicate] tan | +| file://:0:0:0:0 | tanh | semmle.label | [BuiltinPredicate] tanh | +| file://:0:0:0:0 | toDate | semmle.label | [BuiltinPredicate] toDate | +| file://:0:0:0:0 | toFloat | semmle.label | [BuiltinPredicate] toFloat | +| file://:0:0:0:0 | toISO | semmle.label | [BuiltinPredicate] toISO | +| file://:0:0:0:0 | toInt | semmle.label | [BuiltinPredicate] toInt | +| file://:0:0:0:0 | toLowerCase | semmle.label | [BuiltinPredicate] toLowerCase | +| file://:0:0:0:0 | toString | semmle.label | [BuiltinPredicate] toString | +| file://:0:0:0:0 | toString | semmle.label | [BuiltinPredicate] toString | +| file://:0:0:0:0 | toString | semmle.label | [BuiltinPredicate] toString | +| file://:0:0:0:0 | toString | semmle.label | [BuiltinPredicate] toString | +| file://:0:0:0:0 | toString | semmle.label | [BuiltinPredicate] toString | +| file://:0:0:0:0 | toUnicode | semmle.label | [BuiltinPredicate] toUnicode | +| file://:0:0:0:0 | toUpperCase | semmle.label | [BuiltinPredicate] toUpperCase | +| file://:0:0:0:0 | toUrl | semmle.label | [BuiltinPredicate] toUrl | +| file://:0:0:0:0 | toUrl | semmle.label | [BuiltinPredicate] toUrl | +| file://:0:0:0:0 | trim | semmle.label | [BuiltinPredicate] trim | +| file://:0:0:0:0 | ulp | semmle.label | [BuiltinPredicate] ulp | | printAst.ql:1:1:1:28 | Import | semmle.label | [Import] Import | -| printAst.ql:1:1:1:28 | Import | semmle.order | 86 | +| printAst.ql:1:1:1:28 | Import | semmle.order | 87 | | printAst.ql:1:1:1:29 | TopLevel | semmle.label | [TopLevel] TopLevel | -| printAst.ql:1:1:1:29 | TopLevel | semmle.order | 86 | +| printAst.ql:1:1:1:29 | TopLevel | semmle.order | 87 | edges | Foo.qll:1:1:27:2 | TopLevel | Foo.qll:1:1:1:17 | Import | semmle.label | getAnImport() | | Foo.qll:1:1:27:2 | TopLevel | Foo.qll:1:1:1:17 | Import | semmle.order | 1 | | Foo.qll:1:1:27:2 | TopLevel | Foo.qll:3:1:7:1 | Class Foo | semmle.label | getAClass() | | Foo.qll:1:1:27:2 | TopLevel | Foo.qll:3:1:7:1 | Class Foo | semmle.order | 3 | | Foo.qll:1:1:27:2 | TopLevel | Foo.qll:9:7:11:1 | ClasslessPredicate foo | semmle.label | getAPredicate() | -| Foo.qll:1:1:27:2 | TopLevel | Foo.qll:9:7:11:1 | ClasslessPredicate foo | semmle.order | 16 | +| Foo.qll:1:1:27:2 | TopLevel | Foo.qll:9:7:11:1 | ClasslessPredicate foo | semmle.order | 17 | | Foo.qll:1:1:27:2 | TopLevel | Foo.qll:13:1:27:1 | ClasslessPredicate calls | semmle.label | getAPredicate() | -| Foo.qll:1:1:27:2 | TopLevel | Foo.qll:13:1:27:1 | ClasslessPredicate calls | semmle.order | 34 | +| Foo.qll:1:1:27:2 | TopLevel | Foo.qll:13:1:27:1 | ClasslessPredicate calls | semmle.order | 35 | | Foo.qll:3:1:7:1 | Class Foo | Foo.qll:3:19:3:22 | TypeExpr | semmle.label | getASuperType() | | Foo.qll:3:1:7:1 | Class Foo | Foo.qll:3:19:3:22 | TypeExpr | semmle.order | 4 | | Foo.qll:3:1:7:1 | Class Foo | Foo.qll:4:3:4:17 | CharPred Foo | semmle.label | getCharPred() | @@ -207,142 +287,142 @@ edges | Foo.qll:6:23:6:36 | ComparisonFormula | Foo.qll:6:32:6:36 | String | semmle.label | getRightOperand() | | Foo.qll:6:23:6:36 | ComparisonFormula | Foo.qll:6:32:6:36 | String | semmle.order | 15 | | Foo.qll:9:7:11:1 | ClasslessPredicate foo | Foo.qll:9:21:9:25 | f | semmle.label | getParameter(_) | -| Foo.qll:9:7:11:1 | ClasslessPredicate foo | Foo.qll:9:21:9:25 | f | semmle.order | 17 | +| Foo.qll:9:7:11:1 | ClasslessPredicate foo | Foo.qll:9:21:9:25 | f | semmle.order | 18 | | Foo.qll:9:7:11:1 | ClasslessPredicate foo | Foo.qll:10:3:10:85 | ComparisonFormula | semmle.label | getBody() | -| Foo.qll:9:7:11:1 | ClasslessPredicate foo | Foo.qll:10:3:10:85 | ComparisonFormula | semmle.order | 19 | +| Foo.qll:9:7:11:1 | ClasslessPredicate foo | Foo.qll:10:3:10:85 | ComparisonFormula | semmle.order | 20 | | Foo.qll:9:21:9:25 | f | Foo.qll:9:21:9:23 | TypeExpr | semmle.label | getTypeExpr() | -| Foo.qll:9:21:9:25 | f | Foo.qll:9:21:9:23 | TypeExpr | semmle.order | 17 | +| Foo.qll:9:21:9:25 | f | Foo.qll:9:21:9:23 | TypeExpr | semmle.order | 18 | | Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:3:10:3 | f | semmle.label | getLeftOperand() | -| Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:3:10:3 | f | semmle.order | 19 | +| Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:3:10:3 | f | semmle.order | 20 | | Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:5:10:5 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:5:10:5 | ComparisonOp | semmle.order | 21 | +| Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:5:10:5 | ComparisonOp | semmle.order | 22 | | Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:7:10:85 | Rank | semmle.label | getRightOperand() | -| Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:7:10:85 | Rank | semmle.order | 22 | +| Foo.qll:10:3:10:85 | ComparisonFormula | Foo.qll:10:7:10:85 | Rank | semmle.order | 23 | | Foo.qll:10:7:10:85 | Rank | Foo.qll:10:12:10:12 | Integer | semmle.label | getRankExpr() | -| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:12:10:12 | Integer | semmle.order | 23 | +| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:12:10:12 | Integer | semmle.order | 24 | | Foo.qll:10:7:10:85 | Rank | Foo.qll:10:15:10:23 | inner | semmle.label | getArgument(_) | -| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:15:10:23 | inner | semmle.order | 24 | +| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:15:10:23 | inner | semmle.order | 25 | | Foo.qll:10:7:10:85 | Rank | Foo.qll:10:27:10:50 | ComparisonFormula | semmle.label | getRange() | -| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:27:10:50 | ComparisonFormula | semmle.order | 26 | +| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:27:10:50 | ComparisonFormula | semmle.order | 27 | | Foo.qll:10:7:10:85 | Rank | Foo.qll:10:54:10:58 | inner | semmle.label | getExpr(_) | -| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:54:10:58 | inner | semmle.order | 31 | +| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:54:10:58 | inner | semmle.order | 32 | | Foo.qll:10:7:10:85 | Rank | Foo.qll:10:69:10:84 | MemberCall | semmle.label | getOrderBy(_) | -| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:69:10:84 | MemberCall | semmle.order | 32 | +| Foo.qll:10:7:10:85 | Rank | Foo.qll:10:69:10:84 | MemberCall | semmle.order | 33 | | Foo.qll:10:15:10:23 | inner | Foo.qll:10:15:10:17 | TypeExpr | semmle.label | getTypeExpr() | -| Foo.qll:10:15:10:23 | inner | Foo.qll:10:15:10:17 | TypeExpr | semmle.order | 24 | +| Foo.qll:10:15:10:23 | inner | Foo.qll:10:15:10:17 | TypeExpr | semmle.order | 25 | | Foo.qll:10:27:10:42 | MemberCall | Foo.qll:10:27:10:31 | inner | semmle.label | getBase() | -| Foo.qll:10:27:10:42 | MemberCall | Foo.qll:10:27:10:31 | inner | semmle.order | 26 | +| Foo.qll:10:27:10:42 | MemberCall | Foo.qll:10:27:10:31 | inner | semmle.order | 27 | | Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:27:10:42 | MemberCall | semmle.label | getLeftOperand() | -| Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:27:10:42 | MemberCall | semmle.order | 26 | +| Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:27:10:42 | MemberCall | semmle.order | 27 | | Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:44:10:44 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:44:10:44 | ComparisonOp | semmle.order | 29 | +| Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:44:10:44 | ComparisonOp | semmle.order | 30 | | Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:46:10:50 | String | semmle.label | getRightOperand() | -| Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:46:10:50 | String | semmle.order | 30 | +| Foo.qll:10:27:10:50 | ComparisonFormula | Foo.qll:10:46:10:50 | String | semmle.order | 31 | | Foo.qll:10:69:10:84 | MemberCall | Foo.qll:10:69:10:73 | inner | semmle.label | getBase() | -| Foo.qll:10:69:10:84 | MemberCall | Foo.qll:10:69:10:73 | inner | semmle.order | 32 | +| Foo.qll:10:69:10:84 | MemberCall | Foo.qll:10:69:10:73 | inner | semmle.order | 33 | | Foo.qll:13:1:27:1 | ClasslessPredicate calls | Foo.qll:13:17:13:21 | f | semmle.label | getParameter(_) | -| Foo.qll:13:1:27:1 | ClasslessPredicate calls | Foo.qll:13:17:13:21 | f | semmle.order | 35 | +| Foo.qll:13:1:27:1 | ClasslessPredicate calls | Foo.qll:13:17:13:21 | f | semmle.order | 36 | | Foo.qll:13:1:27:1 | ClasslessPredicate calls | Foo.qll:14:3:26:14 | Disjunction | semmle.label | getBody() | -| Foo.qll:13:1:27:1 | ClasslessPredicate calls | Foo.qll:14:3:26:14 | Disjunction | semmle.order | 37 | +| Foo.qll:13:1:27:1 | ClasslessPredicate calls | Foo.qll:14:3:26:14 | Disjunction | semmle.order | 38 | | Foo.qll:13:17:13:21 | f | Foo.qll:13:17:13:19 | TypeExpr | semmle.label | getTypeExpr() | -| Foo.qll:13:17:13:21 | f | Foo.qll:13:17:13:19 | TypeExpr | semmle.order | 35 | +| Foo.qll:13:17:13:21 | f | Foo.qll:13:17:13:19 | TypeExpr | semmle.order | 36 | | Foo.qll:14:3:14:10 | PredicateCall | Foo.qll:14:9:14:9 | f | semmle.label | getArgument(_) | -| Foo.qll:14:3:14:10 | PredicateCall | Foo.qll:14:9:14:9 | f | semmle.order | 44 | +| Foo.qll:14:3:14:10 | PredicateCall | Foo.qll:14:9:14:9 | f | semmle.order | 45 | | Foo.qll:14:3:16:29 | Disjunction | Foo.qll:14:3:14:10 | PredicateCall | semmle.label | getAnOperand() | -| Foo.qll:14:3:16:29 | Disjunction | Foo.qll:14:3:14:10 | PredicateCall | semmle.order | 37 | +| Foo.qll:14:3:16:29 | Disjunction | Foo.qll:14:3:14:10 | PredicateCall | semmle.order | 38 | | Foo.qll:14:3:16:29 | Disjunction | Foo.qll:16:3:16:29 | ComparisonFormula | semmle.label | getAnOperand() | -| Foo.qll:14:3:16:29 | Disjunction | Foo.qll:16:3:16:29 | ComparisonFormula | semmle.order | 45 | +| Foo.qll:14:3:16:29 | Disjunction | Foo.qll:16:3:16:29 | ComparisonFormula | semmle.order | 46 | | Foo.qll:14:3:18:28 | Disjunction | Foo.qll:14:3:16:29 | Disjunction | semmle.label | getAnOperand() | -| Foo.qll:14:3:18:28 | Disjunction | Foo.qll:14:3:16:29 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:18:28 | Disjunction | Foo.qll:14:3:16:29 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:18:28 | Disjunction | Foo.qll:18:3:18:28 | ComparisonFormula | semmle.label | getAnOperand() | -| Foo.qll:14:3:18:28 | Disjunction | Foo.qll:18:3:18:28 | ComparisonFormula | semmle.order | 53 | +| Foo.qll:14:3:18:28 | Disjunction | Foo.qll:18:3:18:28 | ComparisonFormula | semmle.order | 54 | | Foo.qll:14:3:20:13 | Disjunction | Foo.qll:14:3:18:28 | Disjunction | semmle.label | getAnOperand() | -| Foo.qll:14:3:20:13 | Disjunction | Foo.qll:14:3:18:28 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:20:13 | Disjunction | Foo.qll:14:3:18:28 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:20:13 | Disjunction | Foo.qll:20:3:20:13 | ComparisonFormula | semmle.label | getAnOperand() | -| Foo.qll:14:3:20:13 | Disjunction | Foo.qll:20:3:20:13 | ComparisonFormula | semmle.order | 60 | +| Foo.qll:14:3:20:13 | Disjunction | Foo.qll:20:3:20:13 | ComparisonFormula | semmle.order | 61 | | Foo.qll:14:3:22:16 | Disjunction | Foo.qll:14:3:20:13 | Disjunction | semmle.label | getAnOperand() | -| Foo.qll:14:3:22:16 | Disjunction | Foo.qll:14:3:20:13 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:22:16 | Disjunction | Foo.qll:14:3:20:13 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:22:16 | Disjunction | Foo.qll:22:3:22:16 | ComparisonFormula | semmle.label | getAnOperand() | -| Foo.qll:14:3:22:16 | Disjunction | Foo.qll:22:3:22:16 | ComparisonFormula | semmle.order | 66 | +| Foo.qll:14:3:22:16 | Disjunction | Foo.qll:22:3:22:16 | ComparisonFormula | semmle.order | 67 | | Foo.qll:14:3:24:23 | Disjunction | Foo.qll:14:3:22:16 | Disjunction | semmle.label | getAnOperand() | -| Foo.qll:14:3:24:23 | Disjunction | Foo.qll:14:3:22:16 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:24:23 | Disjunction | Foo.qll:14:3:22:16 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:24:23 | Disjunction | Foo.qll:24:3:24:23 | ComparisonFormula | semmle.label | getAnOperand() | -| Foo.qll:14:3:24:23 | Disjunction | Foo.qll:24:3:24:23 | ComparisonFormula | semmle.order | 72 | +| Foo.qll:14:3:24:23 | Disjunction | Foo.qll:24:3:24:23 | ComparisonFormula | semmle.order | 73 | | Foo.qll:14:3:26:14 | Disjunction | Foo.qll:14:3:24:23 | Disjunction | semmle.label | getAnOperand() | -| Foo.qll:14:3:26:14 | Disjunction | Foo.qll:14:3:24:23 | Disjunction | semmle.order | 37 | +| Foo.qll:14:3:26:14 | Disjunction | Foo.qll:14:3:24:23 | Disjunction | semmle.order | 38 | | Foo.qll:14:3:26:14 | Disjunction | Foo.qll:26:3:26:14 | ComparisonFormula | semmle.label | getAnOperand() | -| Foo.qll:14:3:26:14 | Disjunction | Foo.qll:26:3:26:14 | ComparisonFormula | semmle.order | 82 | +| Foo.qll:14:3:26:14 | Disjunction | Foo.qll:26:3:26:14 | ComparisonFormula | semmle.order | 83 | | Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:3:16:7 | String | semmle.label | getLeftOperand() | -| Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:3:16:7 | String | semmle.order | 45 | +| Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:3:16:7 | String | semmle.order | 46 | | Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:9:16:9 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:9:16:9 | ComparisonOp | semmle.order | 47 | +| Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:9:16:9 | ComparisonOp | semmle.order | 48 | | Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:11:16:29 | MemberCall | semmle.label | getRightOperand() | -| Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:11:16:29 | MemberCall | semmle.order | 48 | +| Foo.qll:16:3:16:29 | ComparisonFormula | Foo.qll:16:11:16:29 | MemberCall | semmle.order | 49 | | Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:11:16:11 | f | semmle.label | getBase() | -| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:11:16:11 | f | semmle.order | 48 | +| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:11:16:11 | f | semmle.order | 49 | | Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:22:16:22 | Integer | semmle.label | getArgument(_) | -| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:22:16:22 | Integer | semmle.order | 50 | +| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:22:16:22 | Integer | semmle.order | 51 | | Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:25:16:25 | Integer | semmle.label | getArgument(_) | -| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:25:16:25 | Integer | semmle.order | 51 | +| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:25:16:25 | Integer | semmle.order | 52 | | Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:28:16:28 | Integer | semmle.label | getArgument(_) | -| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:28:16:28 | Integer | semmle.order | 52 | +| Foo.qll:16:11:16:29 | MemberCall | Foo.qll:16:28:16:28 | Integer | semmle.order | 53 | | Foo.qll:18:3:18:9 | InlineCast | Foo.qll:18:3:18:3 | f | semmle.label | getBase() | -| Foo.qll:18:3:18:9 | InlineCast | Foo.qll:18:3:18:3 | f | semmle.order | 53 | +| Foo.qll:18:3:18:9 | InlineCast | Foo.qll:18:3:18:3 | f | semmle.order | 54 | | Foo.qll:18:3:18:9 | InlineCast | Foo.qll:18:6:18:8 | TypeExpr | semmle.label | getTypeExpr() | -| Foo.qll:18:3:18:9 | InlineCast | Foo.qll:18:6:18:8 | TypeExpr | semmle.order | 57 | +| Foo.qll:18:3:18:9 | InlineCast | Foo.qll:18:6:18:8 | TypeExpr | semmle.order | 58 | | Foo.qll:18:3:18:20 | MemberCall | Foo.qll:18:3:18:9 | InlineCast | semmle.label | getBase() | -| Foo.qll:18:3:18:20 | MemberCall | Foo.qll:18:3:18:9 | InlineCast | semmle.order | 53 | +| Foo.qll:18:3:18:20 | MemberCall | Foo.qll:18:3:18:9 | InlineCast | semmle.order | 54 | | Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:3:18:20 | MemberCall | semmle.label | getLeftOperand() | -| Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:3:18:20 | MemberCall | semmle.order | 53 | +| Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:3:18:20 | MemberCall | semmle.order | 54 | | Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:22:18:22 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:22:18:22 | ComparisonOp | semmle.order | 58 | +| Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:22:18:22 | ComparisonOp | semmle.order | 59 | | Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:24:18:28 | String | semmle.label | getRightOperand() | -| Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:24:18:28 | String | semmle.order | 59 | +| Foo.qll:18:3:18:28 | ComparisonFormula | Foo.qll:18:24:18:28 | String | semmle.order | 60 | | Foo.qll:20:3:20:9 | InlineCast | Foo.qll:20:3:20:3 | f | semmle.label | getBase() | -| Foo.qll:20:3:20:9 | InlineCast | Foo.qll:20:3:20:3 | f | semmle.order | 60 | +| Foo.qll:20:3:20:9 | InlineCast | Foo.qll:20:3:20:3 | f | semmle.order | 61 | | Foo.qll:20:3:20:9 | InlineCast | Foo.qll:20:6:20:8 | TypeExpr | semmle.label | getTypeExpr() | -| Foo.qll:20:3:20:9 | InlineCast | Foo.qll:20:6:20:8 | TypeExpr | semmle.order | 63 | +| Foo.qll:20:3:20:9 | InlineCast | Foo.qll:20:6:20:8 | TypeExpr | semmle.order | 64 | | Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:3:20:9 | InlineCast | semmle.label | getLeftOperand() | -| Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:3:20:9 | InlineCast | semmle.order | 60 | +| Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:3:20:9 | InlineCast | semmle.order | 61 | | Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:11:20:11 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:11:20:11 | ComparisonOp | semmle.order | 64 | +| Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:11:20:11 | ComparisonOp | semmle.order | 65 | | Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:13:20:13 | f | semmle.label | getRightOperand() | -| Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:13:20:13 | f | semmle.order | 65 | +| Foo.qll:20:3:20:13 | ComparisonFormula | Foo.qll:20:13:20:13 | f | semmle.order | 66 | | Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:3:22:3 | f | semmle.label | getLeftOperand() | -| Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:3:22:3 | f | semmle.order | 66 | +| Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:3:22:3 | f | semmle.order | 67 | | Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:5:22:5 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:5:22:5 | ComparisonOp | semmle.order | 68 | +| Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:5:22:5 | ComparisonOp | semmle.order | 69 | | Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:7:22:16 | FullAggregate[any] | semmle.label | getRightOperand() | -| Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:7:22:16 | FullAggregate[any] | semmle.order | 69 | +| Foo.qll:22:3:22:16 | ComparisonFormula | Foo.qll:22:7:22:16 | FullAggregate[any] | semmle.order | 70 | | Foo.qll:22:7:22:16 | FullAggregate[any] | Foo.qll:22:11:22:15 | f | semmle.label | getArgument(_) | -| Foo.qll:22:7:22:16 | FullAggregate[any] | Foo.qll:22:11:22:15 | f | semmle.order | 70 | +| Foo.qll:22:7:22:16 | FullAggregate[any] | Foo.qll:22:11:22:15 | f | semmle.order | 71 | | Foo.qll:22:11:22:15 | f | Foo.qll:22:11:22:13 | TypeExpr | semmle.label | getTypeExpr() | -| Foo.qll:22:11:22:15 | f | Foo.qll:22:11:22:13 | TypeExpr | semmle.order | 70 | +| Foo.qll:22:11:22:15 | f | Foo.qll:22:11:22:13 | TypeExpr | semmle.order | 71 | | Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:3:24:3 | Integer | semmle.label | getLeftOperand() | -| Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:3:24:3 | Integer | semmle.order | 72 | +| Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:3:24:3 | Integer | semmle.order | 73 | | Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:5:24:5 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:5:24:5 | ComparisonOp | semmle.order | 74 | +| Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:5:24:5 | ComparisonOp | semmle.order | 75 | | Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:7:24:23 | AddExpr | semmle.label | getRightOperand() | -| Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:7:24:23 | AddExpr | semmle.order | 75 | +| Foo.qll:24:3:24:23 | ComparisonFormula | Foo.qll:24:7:24:23 | AddExpr | semmle.order | 76 | | Foo.qll:24:7:24:23 | AddExpr | Foo.qll:24:7:24:7 | Integer | semmle.label | getLeftOperand() | -| Foo.qll:24:7:24:23 | AddExpr | Foo.qll:24:7:24:7 | Integer | semmle.order | 75 | +| Foo.qll:24:7:24:23 | AddExpr | Foo.qll:24:7:24:7 | Integer | semmle.order | 76 | | Foo.qll:24:7:24:23 | AddExpr | Foo.qll:24:12:24:22 | AddExpr | semmle.label | getRightOperand() | -| Foo.qll:24:7:24:23 | AddExpr | Foo.qll:24:12:24:22 | AddExpr | semmle.order | 77 | +| Foo.qll:24:7:24:23 | AddExpr | Foo.qll:24:12:24:22 | AddExpr | semmle.order | 78 | | Foo.qll:24:12:24:22 | AddExpr | Foo.qll:24:12:24:12 | Integer | semmle.label | getLeftOperand() | -| Foo.qll:24:12:24:22 | AddExpr | Foo.qll:24:12:24:12 | Integer | semmle.order | 77 | +| Foo.qll:24:12:24:22 | AddExpr | Foo.qll:24:12:24:12 | Integer | semmle.order | 78 | | Foo.qll:24:12:24:22 | AddExpr | Foo.qll:24:17:24:21 | AddExpr | semmle.label | getRightOperand() | -| Foo.qll:24:12:24:22 | AddExpr | Foo.qll:24:17:24:21 | AddExpr | semmle.order | 79 | +| Foo.qll:24:12:24:22 | AddExpr | Foo.qll:24:17:24:21 | AddExpr | semmle.order | 80 | | Foo.qll:24:17:24:21 | AddExpr | Foo.qll:24:17:24:17 | Integer | semmle.label | getLeftOperand() | -| Foo.qll:24:17:24:21 | AddExpr | Foo.qll:24:17:24:17 | Integer | semmle.order | 79 | +| Foo.qll:24:17:24:21 | AddExpr | Foo.qll:24:17:24:17 | Integer | semmle.order | 80 | | Foo.qll:24:17:24:21 | AddExpr | Foo.qll:24:21:24:21 | Integer | semmle.label | getRightOperand() | -| Foo.qll:24:17:24:21 | AddExpr | Foo.qll:24:21:24:21 | Integer | semmle.order | 81 | +| Foo.qll:24:17:24:21 | AddExpr | Foo.qll:24:21:24:21 | Integer | semmle.order | 82 | | Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:3:26:6 | Boolean | semmle.label | getLeftOperand() | -| Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:3:26:6 | Boolean | semmle.order | 82 | +| Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:3:26:6 | Boolean | semmle.order | 83 | | Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:8:26:8 | ComparisonOp | semmle.label | getOperator() | -| Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:8:26:8 | ComparisonOp | semmle.order | 84 | +| Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:8:26:8 | ComparisonOp | semmle.order | 85 | | Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:10:26:14 | Boolean | semmle.label | getRightOperand() | -| Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:10:26:14 | Boolean | semmle.order | 85 | +| Foo.qll:26:3:26:14 | ComparisonFormula | Foo.qll:26:10:26:14 | Boolean | semmle.order | 86 | | printAst.ql:1:1:1:29 | TopLevel | printAst.ql:1:1:1:28 | Import | semmle.label | getAnImport() | -| printAst.ql:1:1:1:29 | TopLevel | printAst.ql:1:1:1:28 | Import | semmle.order | 86 | +| printAst.ql:1:1:1:29 | TopLevel | printAst.ql:1:1:1:28 | Import | semmle.order | 87 | graphProperties | semmle.graphKind | tree | diff --git a/ql/test/queries/style/ImplicitThis/Bad.qll b/ql/test/queries/style/ImplicitThis/Bad.qll new file mode 100644 index 00000000000..97b51284acc --- /dev/null +++ b/ql/test/queries/style/ImplicitThis/Bad.qll @@ -0,0 +1,11 @@ +import ql + +class Foo extends string { + Foo() { this = "hello" } + + string getBar() { result = "bar" } + + string getBarWithThis() { result = this.getBar() } + + string getBarWithoutThis() { result = getBar() } +} diff --git a/ql/test/queries/style/ImplicitThis/Good.qll b/ql/test/queries/style/ImplicitThis/Good.qll new file mode 100644 index 00000000000..35b8023b572 --- /dev/null +++ b/ql/test/queries/style/ImplicitThis/Good.qll @@ -0,0 +1,21 @@ +import ql + +class Foo extends string { + Foo() { this = "hello" } + + string getBar() { result = "bar" } + + string getBarWithThis() { result = this.getBar() } + + /* Okay because not a member predicate. */ + string getBaz() { result = Baz::baz() } + + /* Okay because not a member predicate. */ + string getOuterQuux() { result = getQuux() } +} + +string getQuux() { result = "quux" } + +module Baz { + string baz() { result = "baz" } +} diff --git a/ql/test/queries/style/ImplicitThis/ImplicitThis.expected b/ql/test/queries/style/ImplicitThis/ImplicitThis.expected new file mode 100644 index 00000000000..fa3adbaf992 --- /dev/null +++ b/ql/test/queries/style/ImplicitThis/ImplicitThis.expected @@ -0,0 +1 @@ +| Bad.qll:10:41:10:48 | PredicateCall | Use of implicit `this`. | diff --git a/ql/test/queries/style/ImplicitThis/ImplicitThis.qlref b/ql/test/queries/style/ImplicitThis/ImplicitThis.qlref new file mode 100644 index 00000000000..0bdcd3b4b5b --- /dev/null +++ b/ql/test/queries/style/ImplicitThis/ImplicitThis.qlref @@ -0,0 +1 @@ +queries/style/ImplicitThis.ql diff --git a/ql/test/queries/style/ImplicitThis/Okay.qll b/ql/test/queries/style/ImplicitThis/Okay.qll new file mode 100644 index 00000000000..37c9dd4ab2a --- /dev/null +++ b/ql/test/queries/style/ImplicitThis/Okay.qll @@ -0,0 +1,13 @@ +import ql + +class Foo extends string { + Foo() { this = "hello" } + + string getBar() { result = "bar" } + + /* Okay, because we don't write `this.some_method` anywhere */ + string getBarWithoutThis() { result = getBar() } + + /* Okay, because this is the only way to cast `this`. */ + string useThisWithInlineCast() { result = this.(string).toUpperCase() } +} diff --git a/repo-tests/codeql-go.txt b/repo-tests/codeql-go.txt new file mode 100644 index 00000000000..fe33dcd0495 --- /dev/null +++ b/repo-tests/codeql-go.txt @@ -0,0 +1 @@ +abe3f2148b92b1a94a0a3676cb4dab7d9211076f diff --git a/repo-tests/codeql-go/ql/config/legacy-support/qlpack.yml b/repo-tests/codeql-go/ql/config/legacy-support/qlpack.yml new file mode 100644 index 00000000000..cff1d02b271 --- /dev/null +++ b/repo-tests/codeql-go/ql/config/legacy-support/qlpack.yml @@ -0,0 +1,3 @@ +name: legacy-libraries-go +version: 0.0.0 +libraryPathDependencies: codeql-go diff --git a/repo-tests/codeql-go/ql/examples/qlpack.yml b/repo-tests/codeql-go/ql/examples/qlpack.yml new file mode 100644 index 00000000000..39eb537e673 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/qlpack.yml @@ -0,0 +1,4 @@ +name: codeql/go-examples +version: 0.0.2 +dependencies: + codeql/go-all: ^0.0.2 diff --git a/repo-tests/codeql-go/ql/examples/snippets/calltobuiltin.ql b/repo-tests/codeql-go/ql/examples/snippets/calltobuiltin.ql new file mode 100644 index 00000000000..5ff20e563be --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/calltobuiltin.ql @@ -0,0 +1,15 @@ +/** + * @name Call to built-in function + * @description Finds calls to the built-in `len` function. + * @id go/examples/calltolen + * @tags call + * function + * len + * built-in + */ + +import go + +from DataFlow::CallNode call +where call = Builtin::len().getACall() +select call diff --git a/repo-tests/codeql-go/ql/examples/snippets/calltofunction.ql b/repo-tests/codeql-go/ql/examples/snippets/calltofunction.ql new file mode 100644 index 00000000000..5bbe28b52a6 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/calltofunction.ql @@ -0,0 +1,16 @@ +/** + * @name Call to library function + * @description Finds calls to "fmt.Println". + * @id go/examples/calltoprintln + * @tags call + * function + * println + */ + +import go + +from Function println, DataFlow::CallNode call +where + println.hasQualifiedName("fmt", "Println") and + call = println.getACall() +select call diff --git a/repo-tests/codeql-go/ql/examples/snippets/calltomethod.ql b/repo-tests/codeql-go/ql/examples/snippets/calltomethod.ql new file mode 100644 index 00000000000..eebfc707f2b --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/calltomethod.ql @@ -0,0 +1,18 @@ +/** + * @name Call to method + * @description Finds calls to the `Get` method of type `Header` from the `net/http` package. + * @id go/examples/calltoheaderget + * @tags call + * function + * net/http + * Header + * strings + */ + +import go + +from Method get, DataFlow::CallNode call +where + get.hasQualifiedName("net/http", "Header", "Get") and + call = get.getACall() +select call diff --git a/repo-tests/codeql-go/ql/examples/snippets/constant.ql b/repo-tests/codeql-go/ql/examples/snippets/constant.ql new file mode 100644 index 00000000000..3627efb6722 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/constant.ql @@ -0,0 +1,14 @@ +/** + * @name Compile-time constant + * @description Finds compile-time constants with value zero. + * @id go/examples/zeroconstant + * @tags expression + * numeric value + * constant + */ + +import go + +from DataFlow::Node zero +where zero.getNumericValue() = 0 +select zero diff --git a/repo-tests/codeql-go/ql/examples/snippets/emptythen.ql b/repo-tests/codeql-go/ql/examples/snippets/emptythen.ql new file mode 100644 index 00000000000..97a810e388c --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/emptythen.ql @@ -0,0 +1,18 @@ +/** + * @name If statements with empty then branch + * @description Finds 'if' statements where the 'then' branch is + * an empty block statement + * @id go/examples/emptythen + * @tags if + * then + * empty + * conditional + * branch + * statement + */ + +import go + +from IfStmt i +where i.getThen().getNumStmt() = 0 +select i diff --git a/repo-tests/codeql-go/ql/examples/snippets/fieldread.ql b/repo-tests/codeql-go/ql/examples/snippets/fieldread.ql new file mode 100644 index 00000000000..aef9bc2f4d9 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/fieldread.ql @@ -0,0 +1,15 @@ +/** + * @name Field read + * @description Finds code that reads `Request.Method`. + * @id go/examples/readofrequestmethod + * @tags field + * read + */ + +import go + +from Field reqm, Read read +where + reqm.hasQualifiedName("net/http", "Request", "Method") and + read = reqm.getARead() +select read diff --git a/repo-tests/codeql-go/ql/examples/snippets/fieldwrite.ql b/repo-tests/codeql-go/ql/examples/snippets/fieldwrite.ql new file mode 100644 index 00000000000..b9374f4ef24 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/fieldwrite.ql @@ -0,0 +1,15 @@ +/** + * @name Field write + * @description Finds assignments to field `Status` of type `Response` from package `net/http`. + * @id go/examples/responsestatus + * @tags net/http + * field write + */ + +import go + +from Field status, Write write +where + status.hasQualifiedName("net/http", "Response", "Status") and + write = status.getAWrite() +select write, write.getRhs() diff --git a/repo-tests/codeql-go/ql/examples/snippets/function.ql b/repo-tests/codeql-go/ql/examples/snippets/function.ql new file mode 100644 index 00000000000..559dd34c234 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/function.ql @@ -0,0 +1,13 @@ +/** + * @name Function + * @description Finds functions called "main". + * @id go/examples/mainfunction + * @tags function + * main + */ + +import go + +from Function main +where main.getName() = "main" +select main diff --git a/repo-tests/codeql-go/ql/examples/snippets/nilcheck.ql b/repo-tests/codeql-go/ql/examples/snippets/nilcheck.ql new file mode 100644 index 00000000000..cb82f14618e --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/nilcheck.ql @@ -0,0 +1,15 @@ +/** + * @name Comparison with nil + * @description Finds comparisons with nil. + * @id go/examples/nilcheck + * @tags comparison + * nil + */ + +import go + +from DataFlow::EqualityTestNode eq, DataFlow::Node nd, DataFlow::Node nil +where + nil = Builtin::nil().getARead() and + eq.eq(_, nd, nil) +select eq diff --git a/repo-tests/codeql-go/ql/examples/snippets/param.ql b/repo-tests/codeql-go/ql/examples/snippets/param.ql new file mode 100644 index 00000000000..4340e018f9a --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/param.ql @@ -0,0 +1,12 @@ +/** + * @name Parameter + * @description Finds parameters of type "ResponseWriter" from package "net/http". + * @id go/examples/responseparam + * @tags parameter + */ + +import go + +from Parameter req +where req.getType().hasQualifiedName("net/http", "ResponseWriter") +select req diff --git a/repo-tests/codeql-go/ql/examples/snippets/pointertype.ql b/repo-tests/codeql-go/ql/examples/snippets/pointertype.ql new file mode 100644 index 00000000000..b21f03f243e --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/pointertype.ql @@ -0,0 +1,15 @@ +/** + * @name Type + * @description Finds pointer type `*Request` from package `net/http`. + * @id go/examples/requestptrtype + * @tags net/http + * type + */ + +import go + +from Type reqtp, PointerType reqptrtp +where + reqtp.hasQualifiedName("net/http", "Request") and + reqptrtp.getBaseType() = reqtp +select reqptrtp diff --git a/repo-tests/codeql-go/ql/examples/snippets/receiver.ql b/repo-tests/codeql-go/ql/examples/snippets/receiver.ql new file mode 100644 index 00000000000..511ef1314f5 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/receiver.ql @@ -0,0 +1,12 @@ +/** + * @name Receiver variable + * @description Finds receiver variables of pointer type. + * @id go/examples/pointerreceiver + * @tags receiver variable + */ + +import go + +from ReceiverVariable recv +where recv.getType() instanceof PointerType +select recv diff --git a/repo-tests/codeql-go/ql/examples/snippets/result.ql b/repo-tests/codeql-go/ql/examples/snippets/result.ql new file mode 100644 index 00000000000..1241142bd4a --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/result.ql @@ -0,0 +1,12 @@ +/** + * @name Result variable + * @description Finds result variables of type "error". + * @id go/examples/errresult + * @tags result variable + */ + +import go + +from ResultVariable err +where err.getType() = Builtin::error().getType() +select err diff --git a/repo-tests/codeql-go/ql/examples/snippets/type.ql b/repo-tests/codeql-go/ql/examples/snippets/type.ql new file mode 100644 index 00000000000..60861c72bad --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/type.ql @@ -0,0 +1,13 @@ +/** + * @name Type + * @description Finds type `Request` from package `net/http`. + * @id go/examples/requesttype + * @tags net/http + * type + */ + +import go + +from Type request +where request.hasQualifiedName("net/http", "Request") +select request diff --git a/repo-tests/codeql-go/ql/examples/snippets/typeinfo.ql b/repo-tests/codeql-go/ql/examples/snippets/typeinfo.ql new file mode 100644 index 00000000000..b5c22943466 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/typeinfo.ql @@ -0,0 +1,16 @@ +/** + * @name Type information + * @description Finds code elements of type `*Request` from package `net/http`. + * @id go/examples/requests + * @tags net/http + * types + */ + +import go + +from Type reqtp, PointerType reqptrtp, DataFlow::Node req +where + reqtp.hasQualifiedName("net/http", "Request") and + reqptrtp.getBaseType() = reqtp and + req.getType() = reqptrtp +select req diff --git a/repo-tests/codeql-go/ql/examples/snippets/updateinloop.ql b/repo-tests/codeql-go/ql/examples/snippets/updateinloop.ql new file mode 100644 index 00000000000..f047f16dae2 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/updateinloop.ql @@ -0,0 +1,13 @@ +/** + * @name Increment statements in loops + * @description Finds increment statements that are nested in a loop + * @id go/examples/updateinloop + * @tags nesting + * increment + */ + +import go + +from IncStmt s, LoopStmt l +where s.getParent+() = l +select s, l diff --git a/repo-tests/codeql-go/ql/examples/snippets/variable.ql b/repo-tests/codeql-go/ql/examples/snippets/variable.ql new file mode 100644 index 00000000000..28bb4934544 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/variable.ql @@ -0,0 +1,13 @@ +/** + * @name Variable + * @description Finds variables called "err". + * @id go/examples/errvariable + * @tags variable + * err + */ + +import go + +from Variable err +where err.getName() = "err" +select err, err.getDeclaration() diff --git a/repo-tests/codeql-go/ql/examples/snippets/varread.ql b/repo-tests/codeql-go/ql/examples/snippets/varread.ql new file mode 100644 index 00000000000..87282146d41 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/varread.ql @@ -0,0 +1,14 @@ +/** + * @name Variable read + * @description Finds code that reads a variable called `err`. + * @id go/examples/readoferr + * @tags variable read + */ + +import go + +from Variable err, Read read +where + err.getName() = "err" and + read = err.getARead() +select read diff --git a/repo-tests/codeql-go/ql/examples/snippets/varwrite.ql b/repo-tests/codeql-go/ql/examples/snippets/varwrite.ql new file mode 100644 index 00000000000..189e915c167 --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/varwrite.ql @@ -0,0 +1,14 @@ +/** + * @name Variable write + * @description Finds assignments to variables named "err". + * @id go/examples/errwrite + * @tags variable write + */ + +import go + +from Variable err, Write write +where + err.getName() = "err" and + write = err.getAWrite() +select write, write.getRhs() diff --git a/repo-tests/codeql-go/ql/examples/snippets/zerocheck.ql b/repo-tests/codeql-go/ql/examples/snippets/zerocheck.ql new file mode 100644 index 00000000000..467dae7122d --- /dev/null +++ b/repo-tests/codeql-go/ql/examples/snippets/zerocheck.ql @@ -0,0 +1,16 @@ +/** + * @name Comparison with zero + * @description Finds comparisons between an unsigned value and zero. + * @id go/examples/unsignedgez + * @tags comparison + * unsigned + */ + +import go + +from DataFlow::RelationalComparisonNode cmp, DataFlow::Node unsigned, DataFlow::Node zero +where + zero.getNumericValue() = 0 and + unsigned.getType().getUnderlyingType() instanceof UnsignedIntegerType and + cmp.leq(_, zero, unsigned, 0) +select cmp, unsigned diff --git a/repo-tests/codeql-go/ql/lib/Customizations.qll b/repo-tests/codeql-go/ql/lib/Customizations.qll new file mode 100644 index 00000000000..127840de9dd --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/Customizations.qll @@ -0,0 +1,12 @@ +/** + * Contains customizations to the standard library. + * + * This module is imported by `go.qll`, so any customizations defined here automatically + * apply to all queries. + * + * Typical examples of customizations include adding new subclasses of abstract classes such as + * `FileSystemAccess`, or the `Source` and `Sink` classes associated with the security queries + * to model frameworks that are not covered by the standard library. + */ + +import go diff --git a/repo-tests/codeql-go/ql/lib/definitions.ql b/repo-tests/codeql-go/ql/lib/definitions.ql new file mode 100644 index 00000000000..46e28d9fe71 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/definitions.ql @@ -0,0 +1,15 @@ +/** + * @name Jump-to-definition links + * @description Generates use-definition pairs that provide the data + * for jump-to-definition in the code viewer. + * @kind definitions + * @id go/jump-to-definition + */ + +import go + +from Ident def, Ident use, Entity e +where + use.uses(e) and + def.declares(e) +select use, def, "V" diff --git a/repo-tests/codeql-go/ql/lib/go.dbscheme b/repo-tests/codeql-go/ql/lib/go.dbscheme new file mode 100644 index 00000000000..2842941c6f9 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/go.dbscheme @@ -0,0 +1,528 @@ +/** Auto-generated dbscheme; do not edit. */ + + +/** Duplicate code **/ + +duplicateCode( + unique int id : @duplication, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +similarCode( + unique int id : @similarity, + varchar(900) relativePath : string ref, + int equivClass : int ref); + +@duplication_or_similarity = @duplication | @similarity; + +tokens( + int id : @duplication_or_similarity ref, + int offset : int ref, + int beginLine : int ref, + int beginColumn : int ref, + int endLine : int ref, + int endColumn : int ref); + +/** External data **/ + +externalData( + int id : @externalDataElement, + varchar(900) path : string ref, + int column: int ref, + varchar(900) value : string ref +); + +snapshotDate(unique date snapshotDate : date ref); + +sourceLocationPrefix(varchar(900) prefix : string ref); + + +/* + * XML Files + */ + +xmlEncoding( + unique int id: @file ref, + string encoding: string ref +); + +xmlDTDs( + unique int id: @xmldtd, + string root: string ref, + string publicId: string ref, + string systemId: string ref, + int fileid: @file ref +); + +xmlElements( + unique int id: @xmlelement, + string name: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int fileid: @file ref +); + +xmlAttrs( + unique int id: @xmlattribute, + int elementid: @xmlelement ref, + string name: string ref, + string value: string ref, + int idx: int ref, + int fileid: @file ref +); + +xmlNs( + int id: @xmlnamespace, + string prefixName: string ref, + string URI: string ref, + int fileid: @file ref +); + +xmlHasNs( + int elementId: @xmlnamespaceable ref, + int nsId: @xmlnamespace ref, + int fileid: @file ref +); + +xmlComments( + unique int id: @xmlcomment, + string text: string ref, + int parentid: @xmlparent ref, + int fileid: @file ref +); + +xmlChars( + unique int id: @xmlcharacters, + string text: string ref, + int parentid: @xmlparent ref, + int idx: int ref, + int isCDATA: int ref, + int fileid: @file ref +); + +@xmlparent = @file | @xmlelement; +@xmlnamespaceable = @xmlelement | @xmlattribute; + +xmllocations( + int xmlElement: @xmllocatable ref, + int location: @location_default ref +); + +@xmllocatable = @xmlcharacters | @xmlelement | @xmlcomment | @xmlattribute | @xmldtd | @file | @xmlnamespace; + +compilations(unique int id: @compilation, string cwd: string ref); + +#keyset[id, num] +compilation_args(int id: @compilation ref, int num: int ref, string arg: string ref); + +#keyset[id, num, kind] +compilation_time(int id: @compilation ref, int num: int ref, int kind: int ref, float secs: float ref); + +diagnostic_for(unique int diagnostic: @diagnostic ref, int compilation: @compilation ref, int file_number: int ref, int file_number_diagnostic_number: int ref); + +compilation_finished(unique int id: @compilation ref, float cpu_seconds: float ref, float elapsed_seconds: float ref); + +#keyset[id, num] +compilation_compiling_files(int id: @compilation ref, int num: int ref, int file: @file ref); + +diagnostics(unique int id: @diagnostic, int severity: int ref, string error_tag: string ref, string error_message: string ref, + string full_error_message: string ref, int location: @location ref); + +locations_default(unique int id: @location_default, int file: @file ref, int beginLine: int ref, int beginColumn: int ref, + int endLine: int ref, int endColumn: int ref); + +numlines(int element_id: @sourceline ref, int num_lines: int ref, int num_code: int ref, int num_comment: int ref); + +files(unique int id: @file, string name: string ref); + +folders(unique int id: @folder, string name: string ref); + +containerparent(int parent: @container ref, unique int child: @container ref); + +has_location(unique int locatable: @locatable ref, int location: @location ref); + +#keyset[parent, idx] +comment_groups(unique int id: @comment_group, int parent: @file ref, int idx: int ref); + +comments(unique int id: @comment, int kind: int ref, int parent: @comment_group ref, int idx: int ref, string text: string ref); + +doc_comments(unique int node: @documentable ref, int comment: @comment_group ref); + +#keyset[parent, idx] +exprs(unique int id: @expr, int kind: int ref, int parent: @exprparent ref, int idx: int ref); + +literals(unique int expr: @expr ref, string value: string ref, string raw: string ref); + +constvalues(unique int expr: @expr ref, string value: string ref, string exact: string ref); + +fields(unique int id: @field, int parent: @fieldparent ref, int idx: int ref); + +#keyset[parent, idx] +stmts(unique int id: @stmt, int kind: int ref, int parent: @stmtparent ref, int idx: int ref); + +#keyset[parent, idx] +decls(unique int id: @decl, int kind: int ref, int parent: @declparent ref, int idx: int ref); + +#keyset[parent, idx] +specs(unique int id: @spec, int kind: int ref, int parent: @gendecl ref, int idx: int ref); + +scopes(unique int id: @scope, int kind: int ref); + +scopenesting(unique int inner: @scope ref, int outer: @scope ref); + +scopenodes(unique int node: @scopenode ref, int scope: @localscope ref); + +objects(unique int id: @object, int kind: int ref, string name: string ref); + +objectscopes(unique int object: @object ref, int scope: @scope ref); + +objecttypes(unique int object: @object ref, int tp: @type ref); + +methodreceivers(unique int method: @object ref, int receiver: @object ref); + +fieldstructs(unique int field: @object ref, int struct: @structtype ref); + +methodhosts(int method: @object ref, int host: @namedtype ref); + +defs(int ident: @ident ref, int object: @object ref); + +uses(int ident: @ident ref, int object: @object ref); + +types(unique int id: @type, int kind: int ref); + +type_of(unique int expr: @expr ref, int tp: @type ref); + +typename(unique int tp: @type ref, string name: string ref); + +key_type(unique int map: @maptype ref, int tp: @type ref); + +element_type(unique int container: @containertype ref, int tp: @type ref); + +base_type(unique int ptr: @pointertype ref, int tp: @type ref); + +underlying_type(unique int named: @namedtype ref, int tp: @type ref); + +#keyset[parent, index] +component_types(int parent: @compositetype ref, int index: int ref, string name: string ref, int tp: @type ref); + +array_length(unique int tp: @arraytype ref, string len: string ref); + +type_objects(unique int tp: @type ref, int object: @object ref); + +packages(unique int id: @package, string name: string ref, string path: string ref, int scope: @packagescope ref); + +#keyset[parent, idx] +modexprs(unique int id: @modexpr, int kind: int ref, int parent: @modexprparent ref, int idx: int ref); + +#keyset[parent, idx] +modtokens(string token: string ref, int parent: @modexpr ref, int idx: int ref); + +#keyset[package, idx] +errors(unique int id: @error, int kind: int ref, string msg: string ref, string rawpos: string ref, + string file: string ref, int line: int ref, int col: int ref, int package: @package ref, int idx: int ref); + +has_ellipsis(int id: @callorconversionexpr ref); + +@container = @file | @folder; + +@locatable = @xmllocatable | @node | @localscope; + +@node = @documentable | @exprparent | @modexprparent | @fieldparent | @stmtparent | @declparent | @scopenode + | @comment_group | @comment; + +@documentable = @file | @field | @spec | @gendecl | @funcdecl | @modexpr; + +@exprparent = @funcdef | @file | @expr | @field | @stmt | @decl | @spec; + +@modexprparent = @file | @modexpr; + +@fieldparent = @decl | @structtypeexpr | @functypeexpr | @interfacetypeexpr; + +@stmtparent = @funcdef | @stmt | @decl; + +@declparent = @file | @declstmt; + +@funcdef = @funclit | @funcdecl; + +@scopenode = @file | @functypeexpr | @blockstmt | @ifstmt | @caseclause | @switchstmt | @commclause | @loopstmt; + +@location = @location_default; + +@sourceline = @locatable; + +case @comment.kind of + 0 = @slashslashcomment +| 1 = @slashstarcomment; + +case @expr.kind of + 0 = @badexpr +| 1 = @ident +| 2 = @ellipsis +| 3 = @intlit +| 4 = @floatlit +| 5 = @imaglit +| 6 = @charlit +| 7 = @stringlit +| 8 = @funclit +| 9 = @compositelit +| 10 = @parenexpr +| 11 = @selectorexpr +| 12 = @indexexpr +| 13 = @sliceexpr +| 14 = @typeassertexpr +| 15 = @callorconversionexpr +| 16 = @starexpr +| 17 = @keyvalueexpr +| 18 = @arraytypeexpr +| 19 = @structtypeexpr +| 20 = @functypeexpr +| 21 = @interfacetypeexpr +| 22 = @maptypeexpr +| 23 = @plusexpr +| 24 = @minusexpr +| 25 = @notexpr +| 26 = @complementexpr +| 27 = @derefexpr +| 28 = @addressexpr +| 29 = @arrowexpr +| 30 = @lorexpr +| 31 = @landexpr +| 32 = @eqlexpr +| 33 = @neqexpr +| 34 = @lssexpr +| 35 = @leqexpr +| 36 = @gtrexpr +| 37 = @geqexpr +| 38 = @addexpr +| 39 = @subexpr +| 40 = @orexpr +| 41 = @xorexpr +| 42 = @mulexpr +| 43 = @quoexpr +| 44 = @remexpr +| 45 = @shlexpr +| 46 = @shrexpr +| 47 = @andexpr +| 48 = @andnotexpr +| 49 = @sendchantypeexpr +| 50 = @recvchantypeexpr +| 51 = @sendrcvchantypeexpr; + +@basiclit = @intlit | @floatlit | @imaglit | @charlit | @stringlit; + +@operatorexpr = @logicalexpr | @arithmeticexpr | @bitwiseexpr | @unaryexpr | @binaryexpr; + +@logicalexpr = @logicalunaryexpr | @logicalbinaryexpr; + +@arithmeticexpr = @arithmeticunaryexpr | @arithmeticbinaryexpr; + +@bitwiseexpr = @bitwiseunaryexpr | @bitwisebinaryexpr; + +@unaryexpr = @logicalunaryexpr | @bitwiseunaryexpr | @arithmeticunaryexpr | @derefexpr | @addressexpr | @arrowexpr; + +@logicalunaryexpr = @notexpr; + +@bitwiseunaryexpr = @complementexpr; + +@arithmeticunaryexpr = @plusexpr | @minusexpr; + +@binaryexpr = @logicalbinaryexpr | @bitwisebinaryexpr | @arithmeticbinaryexpr | @comparison; + +@logicalbinaryexpr = @lorexpr | @landexpr; + +@bitwisebinaryexpr = @shiftexpr | @orexpr | @xorexpr | @andexpr | @andnotexpr; + +@arithmeticbinaryexpr = @addexpr | @subexpr | @mulexpr | @quoexpr | @remexpr; + +@shiftexpr = @shlexpr | @shrexpr; + +@comparison = @equalitytest | @relationalcomparison; + +@equalitytest = @eqlexpr | @neqexpr; + +@relationalcomparison = @lssexpr | @leqexpr | @gtrexpr | @geqexpr; + +@chantypeexpr = @sendchantypeexpr | @recvchantypeexpr | @sendrcvchantypeexpr; + +case @stmt.kind of + 0 = @badstmt +| 1 = @declstmt +| 2 = @emptystmt +| 3 = @labeledstmt +| 4 = @exprstmt +| 5 = @sendstmt +| 6 = @incstmt +| 7 = @decstmt +| 8 = @gostmt +| 9 = @deferstmt +| 10 = @returnstmt +| 11 = @breakstmt +| 12 = @continuestmt +| 13 = @gotostmt +| 14 = @fallthroughstmt +| 15 = @blockstmt +| 16 = @ifstmt +| 17 = @caseclause +| 18 = @exprswitchstmt +| 19 = @typeswitchstmt +| 20 = @commclause +| 21 = @selectstmt +| 22 = @forstmt +| 23 = @rangestmt +| 24 = @assignstmt +| 25 = @definestmt +| 26 = @addassignstmt +| 27 = @subassignstmt +| 28 = @mulassignstmt +| 29 = @quoassignstmt +| 30 = @remassignstmt +| 31 = @andassignstmt +| 32 = @orassignstmt +| 33 = @xorassignstmt +| 34 = @shlassignstmt +| 35 = @shrassignstmt +| 36 = @andnotassignstmt; + +@incdecstmt = @incstmt | @decstmt; + +@assignment = @simpleassignstmt | @compoundassignstmt; + +@simpleassignstmt = @assignstmt | @definestmt; + +@compoundassignstmt = @addassignstmt | @subassignstmt | @mulassignstmt | @quoassignstmt | @remassignstmt + | @andassignstmt | @orassignstmt | @xorassignstmt | @shlassignstmt | @shrassignstmt | @andnotassignstmt; + +@branchstmt = @breakstmt | @continuestmt | @gotostmt | @fallthroughstmt; + +@switchstmt = @exprswitchstmt | @typeswitchstmt; + +@loopstmt = @forstmt | @rangestmt; + +case @decl.kind of + 0 = @baddecl +| 1 = @importdecl +| 2 = @constdecl +| 3 = @typedecl +| 4 = @vardecl +| 5 = @funcdecl; + +@gendecl = @importdecl | @constdecl | @typedecl | @vardecl; + +case @spec.kind of + 0 = @importspec +| 1 = @valuespec +| 2 = @typedefspec +| 3 = @aliasspec; + +@typespec = @typedefspec | @aliasspec; + +case @object.kind of + 0 = @pkgobject +| 1 = @decltypeobject +| 2 = @builtintypeobject +| 3 = @declconstobject +| 4 = @builtinconstobject +| 5 = @declvarobject +| 6 = @declfunctionobject +| 7 = @builtinfunctionobject +| 8 = @labelobject; + +@declobject = @decltypeobject | @declconstobject | @declvarobject | @declfunctionobject; + +@builtinobject = @builtintypeobject | @builtinconstobject | @builtinfunctionobject; + +@typeobject = @decltypeobject | @builtintypeobject; + +@valueobject = @constobject | @varobject | @functionobject; + +@constobject = @declconstobject | @builtinconstobject; + +@varobject = @declvarobject; + +@functionobject = @declfunctionobject | @builtinfunctionobject; + +case @scope.kind of + 0 = @universescope +| 1 = @packagescope +| 2 = @localscope; + +case @type.kind of + 0 = @invalidtype +| 1 = @boolexprtype +| 2 = @inttype +| 3 = @int8type +| 4 = @int16type +| 5 = @int32type +| 6 = @int64type +| 7 = @uinttype +| 8 = @uint8type +| 9 = @uint16type +| 10 = @uint32type +| 11 = @uint64type +| 12 = @uintptrtype +| 13 = @float32type +| 14 = @float64type +| 15 = @complex64type +| 16 = @complex128type +| 17 = @stringexprtype +| 18 = @unsafepointertype +| 19 = @boolliteraltype +| 20 = @intliteraltype +| 21 = @runeliteraltype +| 22 = @floatliteraltype +| 23 = @complexliteraltype +| 24 = @stringliteraltype +| 25 = @nilliteraltype +| 26 = @arraytype +| 27 = @slicetype +| 28 = @structtype +| 29 = @pointertype +| 30 = @interfacetype +| 31 = @tupletype +| 32 = @signaturetype +| 33 = @maptype +| 34 = @sendchantype +| 35 = @recvchantype +| 36 = @sendrcvchantype +| 37 = @namedtype; + +@basictype = @booltype | @numerictype | @stringtype | @literaltype | @invalidtype | @unsafepointertype; + +@booltype = @boolexprtype | @boolliteraltype; + +@numerictype = @integertype | @floattype | @complextype; + +@integertype = @signedintegertype | @unsignedintegertype; + +@signedintegertype = @inttype | @int8type | @int16type | @int32type | @int64type | @intliteraltype | @runeliteraltype; + +@unsignedintegertype = @uinttype | @uint8type | @uint16type | @uint32type | @uint64type | @uintptrtype; + +@floattype = @float32type | @float64type | @floatliteraltype; + +@complextype = @complex64type | @complex128type | @complexliteraltype; + +@stringtype = @stringexprtype | @stringliteraltype; + +@literaltype = @boolliteraltype | @intliteraltype | @runeliteraltype | @floatliteraltype | @complexliteraltype + | @stringliteraltype | @nilliteraltype; + +@compositetype = @containertype | @structtype | @pointertype | @interfacetype | @tupletype | @signaturetype | @namedtype; + +@containertype = @arraytype | @slicetype | @maptype | @chantype; + +@chantype = @sendchantype | @recvchantype | @sendrcvchantype; + +case @modexpr.kind of + 0 = @modcommentblock +| 1 = @modline +| 2 = @modlineblock +| 3 = @modlparen +| 4 = @modrparen; + +case @error.kind of + 0 = @unknownerror +| 1 = @listerror +| 2 = @parseerror +| 3 = @typeerror; + diff --git a/repo-tests/codeql-go/ql/lib/go.qll b/repo-tests/codeql-go/ql/lib/go.qll new file mode 100644 index 00000000000..7ebaa8a0626 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/go.qll @@ -0,0 +1,64 @@ +/** + * Provides classes for working with Go programs. + */ + +import Customizations +import semmle.go.Architectures +import semmle.go.AST +import semmle.go.Comments +import semmle.go.Concepts +import semmle.go.Decls +import semmle.go.Errors +import semmle.go.Expr +import semmle.go.Files +import semmle.go.GoMod +import semmle.go.HTML +import semmle.go.Locations +import semmle.go.Packages +import semmle.go.Scopes +import semmle.go.Stmt +import semmle.go.StringOps +import semmle.go.Types +import semmle.go.Util +import semmle.go.VariableWithFields +import semmle.go.controlflow.BasicBlocks +import semmle.go.controlflow.ControlFlowGraph +import semmle.go.controlflow.IR +import semmle.go.dataflow.DataFlow +import semmle.go.dataflow.DataFlow2 +import semmle.go.dataflow.GlobalValueNumbering +import semmle.go.dataflow.SSA +import semmle.go.dataflow.TaintTracking +import semmle.go.dataflow.TaintTracking2 +import semmle.go.frameworks.Beego +import semmle.go.frameworks.BeegoOrm +import semmle.go.frameworks.Chi +import semmle.go.frameworks.Couchbase +import semmle.go.frameworks.Echo +import semmle.go.frameworks.ElazarlGoproxy +import semmle.go.frameworks.Email +import semmle.go.frameworks.Encoding +import semmle.go.frameworks.EvanphxJsonPatch +import semmle.go.frameworks.Gin +import semmle.go.frameworks.Glog +import semmle.go.frameworks.GoRestfulHttp +import semmle.go.frameworks.K8sIoApimachineryPkgRuntime +import semmle.go.frameworks.K8sIoApiCoreV1 +import semmle.go.frameworks.K8sIoClientGo +import semmle.go.frameworks.Logrus +import semmle.go.frameworks.Macaron +import semmle.go.frameworks.Mux +import semmle.go.frameworks.NoSQL +import semmle.go.frameworks.Protobuf +import semmle.go.frameworks.Revel +import semmle.go.frameworks.Spew +import semmle.go.frameworks.SQL +import semmle.go.frameworks.Stdlib +import semmle.go.frameworks.SystemCommandExecutors +import semmle.go.frameworks.Testing +import semmle.go.frameworks.WebSocket +import semmle.go.frameworks.XNetHtml +import semmle.go.frameworks.XPath +import semmle.go.frameworks.Yaml +import semmle.go.frameworks.Zap +import semmle.go.security.FlowSources diff --git a/repo-tests/codeql-go/ql/lib/ideContextual.qll b/repo-tests/codeql-go/ql/lib/ideContextual.qll new file mode 100644 index 00000000000..b729aa81c8f --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/ideContextual.qll @@ -0,0 +1,23 @@ +/** + * Provides classes and predicates related to contextual queries + * in the code viewer. + */ + +import go + +/** + * Returns the `File` matching the given source file name as encoded by the VS + * Code extension. + */ +cached +File getFileBySourceArchiveName(string name) { + // The name provided for a file in the source archive by the VS Code extension + // has some differences from the absolute path in the database: + // 1. colons are replaced by underscores + // 2. there's a leading slash, even for Windows paths: "C:/foo/bar" -> + // "/C_/foo/bar" + // 3. double slashes in UNC prefixes are replaced with a single slash + // We can handle 2 and 3 together by unconditionally adding a leading slash + // before replacing double slashes. + name = ("/" + result.getAbsolutePath().replaceAll(":", "_")).replaceAll("//", "/") +} diff --git a/repo-tests/codeql-go/ql/lib/localDefinitions.ql b/repo-tests/codeql-go/ql/lib/localDefinitions.ql new file mode 100644 index 00000000000..0ef61cbfdd3 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/localDefinitions.ql @@ -0,0 +1,20 @@ +/** + * @name Jump-to-definition links + * @description Generates use-definition pairs that provide the data + * for jump-to-definition in the code viewer. + * @kind definitions + * @id go/ide-jump-to-definition + * @tags ide-contextual-queries/local-definitions + */ + +import go +import ideContextual + +external string selectedSourceFile(); + +from Ident def, Ident use, Entity e +where + use.uses(e) and + def.declares(e) and + use.getFile() = getFileBySourceArchiveName(selectedSourceFile()) +select use, def, "V" diff --git a/repo-tests/codeql-go/ql/lib/localReferences.ql b/repo-tests/codeql-go/ql/lib/localReferences.ql new file mode 100644 index 00000000000..c0ed61ddfce --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/localReferences.ql @@ -0,0 +1,20 @@ +/** + * @name Find-references links + * @description Generates use-definition pairs that provide the data + * for find-references in the code viewer. + * @kind definitions + * @id go/ide-find-references + * @tags ide-contextual-queries/local-references + */ + +import go +import ideContextual + +external string selectedSourceFile(); + +from Ident def, Ident use, Entity e +where + use.uses(e) and + def.declares(e) and + def.getFile() = getFileBySourceArchiveName(selectedSourceFile()) +select use, def, "V" diff --git a/repo-tests/codeql-go/ql/lib/printAst.ql b/repo-tests/codeql-go/ql/lib/printAst.ql new file mode 100644 index 00000000000..9d6d5c2d7b9 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/printAst.ql @@ -0,0 +1,30 @@ +/** + * @name Print AST + * @description Outputs a representation of a file's Abstract Syntax Tree. This + * query is used by the VS Code extension. + * @id go/print-ast + * @kind graph + * @tags ide-contextual-queries/print-ast + */ + +import go +import semmle.go.PrintAst +import ideContextual + +/** + * The source file to generate an AST from. + */ +external string selectedSourceFile(); + +/** + * Hook to customize the functions printed by this query. + */ +class Cfg extends PrintAstConfiguration { + override predicate shouldPrintFunction(FuncDecl func) { shouldPrintFile(func.getFile()) } + + override predicate shouldPrintFile(File file) { + file = getFileBySourceArchiveName(selectedSourceFile()) + } + + override predicate shouldPrintComments(File file) { none() } +} diff --git a/repo-tests/codeql-go/ql/lib/qlpack.yml b/repo-tests/codeql-go/ql/lib/qlpack.yml new file mode 100644 index 00000000000..b61c85d98a9 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/qlpack.yml @@ -0,0 +1,7 @@ +name: codeql/go-all +version: 0.0.2 +dbscheme: go.dbscheme +extractor: go +library: true +dependencies: + codeql/go-upgrades: ^0.0.2 diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/AST.qll b/repo-tests/codeql-go/ql/lib/semmle/go/AST.qll new file mode 100644 index 00000000000..6ce205aefae --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/AST.qll @@ -0,0 +1,234 @@ +/** + * Provides classes for working with AST nodes. + */ + +import go + +/** + * An AST node. + */ +class AstNode extends @node, Locatable { + /** + * Gets the `i`th child node of this node. + * + * Note that the precise indices of child nodes are considered an implementation detail + * and are subject to change without notice. + */ + AstNode getChild(int i) { + result = this.(ExprParent).getChildExpr(i) or + result = this.(GoModExprParent).getChildGoModExpr(i) or + result = this.(StmtParent).getChildStmt(i) or + result = this.(DeclParent).getDecl(i) or + result = this.(GenDecl).getSpec(i) or + result = this.(FieldParent).getField(i) or + result = this.(File).getCommentGroup(i) or + result = this.(CommentGroup).getComment(i) + } + + /** + * Gets a child node of this node. + */ + AstNode getAChild() { result = getChild(_) } + + /** + * Gets the number of child nodes of this node. + */ + int getNumChild() { result = count(getAChild()) } + + /** + * Gets a child with the given index and of the given kind, if one exists. + * Note that a given parent can have multiple children with the same index but differing kind. + */ + private AstNode getChildOfKind(string kind, int i) { + kind = "expr" and result = this.(ExprParent).getChildExpr(i) + or + kind = "gomodexpr" and result = this.(GoModExprParent).getChildGoModExpr(i) + or + kind = "stmt" and result = this.(StmtParent).getChildStmt(i) + or + kind = "decl" and result = this.(DeclParent).getDecl(i) + or + kind = "spec" and result = this.(GenDecl).getSpec(i) + or + kind = "field" and result = this.(FieldParent).getField(i) + or + kind = "commentgroup" and result = this.(File).getCommentGroup(i) + or + kind = "comment" and result = this.(CommentGroup).getComment(i) + } + + /** + * Get an AstNode child, ordered by child kind and then by index. + */ + AstNode getUniquelyNumberedChild(int index) { + result = + rank[index + 1](AstNode child, string kind, int i | + child = getChildOfKind(kind, i) + | + child order by kind, i + ) + } + + /** Gets the parent node of this AST node, if any. */ + AstNode getParent() { this = result.getAChild() } + + /** Gets the parent node of this AST node, but without crossing function boundaries. */ + private AstNode parentInSameFunction() { + result = getParent() and + not this instanceof FuncDef + } + + /** Gets the innermost function definition to which this AST node belongs, if any. */ + FuncDef getEnclosingFunction() { result = getParent().parentInSameFunction*() } + + /** + * Gets a comma-separated list of the names of the primary CodeQL classes to which this element belongs. + */ + final string getPrimaryQlClasses() { result = concat(getAPrimaryQlClass(), ",") } + + /** + * Gets the name of a primary CodeQL class to which this node belongs. + * + * For most nodes, this is simply the most precise syntactic category to which they belong; + * for example, `AddExpr` is a primary class, but `BinaryExpr` is not. + * + * For identifiers and selector expressions, the class describing what kind of entity they refer + * to (for example `FunctionName` or `TypeName`) is also considered primary. For such nodes, + * this predicate has multiple values. + */ + string getAPrimaryQlClass() { result = "???" } + + override string toString() { result = "AST node" } +} + +/** + * An AST node whose children include expressions. + */ +class ExprParent extends @exprparent, AstNode { + /** + * Gets the `i`th child expression of this node. + * + * Note that the precise indices of child expressions are considered an implementation detail + * and are subject to change without notice. + */ + Expr getChildExpr(int i) { exprs(result, _, this, i) } + + /** + * Gets an expression that is a child node of this node in the AST. + */ + Expr getAChildExpr() { result = getChildExpr(_) } + + /** + * Gets the number of child expressions of this node. + */ + int getNumChildExpr() { result = count(getAChildExpr()) } +} + +/** + * An AST node whose children include go.mod expressions. + */ +class GoModExprParent extends @modexprparent, AstNode { + /** + * Gets the `i`th child expression of this node. + * + * Note that the precise indices of child expressions are considered an implementation detail + * and are subject to change without notice. + */ + GoModExpr getChildGoModExpr(int i) { modexprs(result, _, this, i) } + + /** + * Gets an expression that is a child node of this node in the AST. + */ + GoModExpr getAChildGoModExpr() { result = getChildGoModExpr(_) } + + /** + * Gets the number of child expressions of this node. + */ + int getNumChildGoModExpr() { result = count(getAChildGoModExpr()) } +} + +/** + * An AST node whose children include statements. + */ +class StmtParent extends @stmtparent, AstNode { + /** + * Gets the `i`th child statement of this node. + * + * Note that the precise indices of child statements are considered an implementation detail + * and are subject to change without notice. + */ + Stmt getChildStmt(int i) { stmts(result, _, this, i) } + + /** + * Gets a statement that is a child node of this node in the AST. + */ + Stmt getAChildStmt() { result = getChildStmt(_) } + + /** + * Gets the number of child statements of this node. + */ + int getNumChildStmt() { result = count(getAChildStmt()) } +} + +/** + * An AST node whose children include declarations. + */ +class DeclParent extends @declparent, AstNode { + /** + * Gets the `i`th child declaration of this node. + * + * Note that the precise indices of declarations are considered an implementation detail + * and are subject to change without notice. + */ + Decl getDecl(int i) { decls(result, _, this, i) } + + /** + * Gets a child declaration of this node in the AST. + */ + Decl getADecl() { result = getDecl(_) } + + /** + * Gets the number of child declarations of this node. + */ + int getNumDecl() { result = count(getADecl()) } +} + +/** + * An AST node whose children include fields. + */ +class FieldParent extends @fieldparent, AstNode { + /** + * Gets the `i`th field of this node. + * + * Note that the precise indices of fields are considered an implementation detail + * and are subject to change without notice. + */ + FieldBase getField(int i) { fields(result, this, i) } + + /** + * Gets a child field of this node in the AST. + */ + FieldBase getAField() { result = getField(_) } + + /** + * Gets the number of child fields of this node. + */ + int getNumFields() { result = count(getAField()) } +} + +/** + * An AST node which may induce a scope. + * + * The following nodes may induce scopes: + * + * - files + * - block statements, `if` statements, `switch` statements, `case` clauses, comm clauses, loops + * - function type expressions + * + * Note that functions themselves do not induce a scope, it is their type declaration that induces + * the scope. + */ +class ScopeNode extends @scopenode, AstNode { + /** Gets the scope induced by this node, if any. */ + LocalScope getScope() { scopenodes(this, result) } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Architectures.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Architectures.qll new file mode 100644 index 00000000000..e509de4bc68 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Architectures.qll @@ -0,0 +1,37 @@ +/** Provides classes for working with architectures. */ + +import go + +/** + * An architecture that is valid in a build constraint. + * + * Information obtained from + * https://github.com/golang/go/blob/e125ccd10ea191101dbc31f0dd39a98f9d3ab929/src/go/types/gccgosizes.go + * where the first field of the struct is 4 for 32-bit architectures + * and 8 for 64-bit architectures. + */ +class Architecture extends string { + int bitSize; + + Architecture() { + this in [ + "386", "amd64p32", "arm", "armbe", "m64k", "mips", "mipsle", "mips64p32", "mips64p32le", + "nios2", "ppc", "riscv", "s390", "sh", "shbe", "sparc" + ] and + bitSize = 32 + or + this in [ + "alpha", "amd64", "arm64", "arm64be", "ia64", "mips64", "mips64le", "ppc64", "ppc64le", + "riscv64", "s390x", "sparc64", "wasm" + ] and + bitSize = 64 + } + + /** + * Gets the integer and pointer type width for this architecture. + * + * As of the time of writing, this appears to always be identical -- there aren't + * Go architectures with 64-bit pointers but 32-bit ints, for example. + */ + int getBitSize() { result = bitSize } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Comments.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Comments.qll new file mode 100644 index 00000000000..e072c0cb7ca --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Comments.qll @@ -0,0 +1,226 @@ +/** + * Provides classes for working with code comments. + */ + +import go + +/** + * A code comment. + * + * Examples: + * + *
+ * // a line comment
+ * /* a block
+ *   comment */
+ * 
+ */ +class Comment extends @comment, AstNode { + /** + * Gets the text of this comment, not including delimiters. + */ + string getText() { comments(this, _, _, _, result) } + + /** + * Gets the comment group to which this comment belongs. + */ + CommentGroup getGroup() { this = result.getAComment() } + + override string toString() { result = "comment" } + + override string getAPrimaryQlClass() { result = "Comment" } +} + +/** + * A comment group, that is, a sequence of comments without any intervening tokens or + * empty lines. + * + * Examples: + * + *
+ * // a line comment
+ * // another line comment
+ *
+ * // a line comment
+ * /* a block
+ *   comment */
+ *
+ * /* a block
+ * comment */
+ * /* another block comment */
+ * 
+ */ +class CommentGroup extends @comment_group, AstNode { + /** + * Gets the file to which this comment group belongs. + */ + override File getParent() { this = result.getACommentGroup() } + + /** Gets the `i`th comment in this group (0-based indexing). */ + Comment getComment(int i) { comments(result, _, this, i, _) } + + /** Gets a comment in this group. */ + Comment getAComment() { result = getComment(_) } + + /** Gets the number of comments in this group. */ + int getNumComment() { result = count(getAComment()) } + + override string toString() { result = "comment group" } + + override string getAPrimaryQlClass() { result = "CommentGroup" } +} + +/** + * A program element to which a documentation comment group may be attached: + * a file, a field, a specifier, a generic declaration, a function declaration + * or a go.mod expression. + * + * Examples: + * + * ```go + * // function documentation + * func double(x int) int { return 2 * x } + * + * // generic declaration documentation + * const ( + * // specifier documentation + * size int64 = 1024 + * eof = -1 // not specifier documentation + * ) + * ``` + */ +class Documentable extends AstNode, @documentable { + /** Gets the documentation comment group attached to this element, if any. */ + DocComment getDocumentation() { this = result.getDocumentedElement() } +} + +/** + * A comment group that is attached to a program element as documentation. + * + * Examples: + * + * ```go + * // function documentation + * func double(x int) int { return 2 * x } + * + * // generic declaration documentation + * const ( + * // specifier documentation + * size int64 = 1024 + * eof = -1 // not specifier documentation + * ) + * ``` + */ +class DocComment extends CommentGroup { + Documentable node; + + DocComment() { doc_comments(node, this) } + + /** Gets the program element documented by this comment group. */ + Documentable getDocumentedElement() { result = node } + + override string getAPrimaryQlClass() { result = "DocComment" } +} + +/** + * A single-line comment starting with `//`. + * + * Examples: + * + * ```go + * // Single line comment + * ``` + */ +class SlashSlashComment extends @slashslashcomment, Comment { + override string getAPrimaryQlClass() { result = "SlashSlashComment" } +} + +/** + * A block comment starting with `/*` and ending with */. + * + * Examples: + * + *
+ * /* a block
+ *   comment */
+ * 
+ */ +class SlashStarComment extends @slashstarcomment, Comment { + override string getAPrimaryQlClass() { result = "SlashStarComment" } +} + +/** + * A single-line comment starting with `//`. + * + * Examples: + * + * ```go + * // Single line comment + * ``` + */ +class LineComment = SlashSlashComment; + +/** + * A block comment starting with `/*` and ending with */. + * + * Examples: + * + *
+ * /* a block
+ *   comment */
+ * 
+ */ +class BlockComment = SlashStarComment; + +/** Holds if `c` starts at `line`, `col` in `f`, and precedes the package declaration. */ +private predicate isInitialComment(Comment c, File f, int line, int col) { + c.hasLocationInfo(f.getAbsolutePath(), line, col, _, _) and + line < f.getPackageNameExpr().getLocation().getStartLine() +} + +/** Gets the `i`th initial comment in `f` (0-based). */ +private Comment getInitialComment(File f, int i) { + result = + rank[i + 1](Comment c, int line, int col | + isInitialComment(c, f, line, col) + | + c order by line, col + ) +} + +/** + * A build constraint comment of the form `// +build ...` or `//go:build ...`. + * + * Examples: + * + * ```go + * // +build darwin freebsd netbsd openbsd + * // +build !linux + * ``` + */ +class BuildConstraintComment extends LineComment { + BuildConstraintComment() { + // a line comment preceding the package declaration, itself only preceded by + // line comments + exists(File f, int i | + // correctness of the placement of the build constraint is not checked here; + // this is more lax than the actual rules for build constraints + this = getInitialComment(f, i) and + not getInitialComment(f, [0 .. i - 1]) instanceof BlockComment + ) and + ( + // comment text starts with `+build` or `go:build` + this.getText().regexpMatch("\\s*\\+build.*") + or + this.getText().regexpMatch("\\s*go:build.*") + ) + } + + override string getAPrimaryQlClass() { result = "BuildConstraintComment" } + + /** Gets the body of this build constraint. */ + string getConstraintBody() { result = getText().splitAt("build ", 1) } + + /** Gets a disjunct of this build constraint. */ + string getADisjunct() { result = getConstraintBody().splitAt(" ") } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Concepts.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Concepts.qll new file mode 100644 index 00000000000..f3920c2ec5c --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Concepts.qll @@ -0,0 +1,475 @@ +/** + * Provides abstract classes representing generic concepts such as file system + * access or system command execution, for which individual framework libraries + * provide concrete subclasses. + */ + +import go +import semmle.go.dataflow.FunctionInputsAndOutputs +import semmle.go.concepts.HTTP +import semmle.go.concepts.GeneratedFile + +/** + * A data-flow node that executes an operating system command, + * for instance by spawning a new process. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `SystemCommandExecution::Range` instead. + */ +class SystemCommandExecution extends DataFlow::Node { + SystemCommandExecution::Range self; + + SystemCommandExecution() { this = self } + + /** Gets the argument that specifies the command to be executed. */ + DataFlow::Node getCommandName() { result = self.getCommandName() } + + /** Holds if this node is sanitized whenever it follows `--` in an argument list. */ + predicate doubleDashIsSanitizing() { self.doubleDashIsSanitizing() } +} + +/** Provides a class for modeling new system-command execution APIs. */ +module SystemCommandExecution { + /** + * A data-flow node that executes an operating system command, + * for instance by spawning a new process. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `SystemCommandExecution` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument that specifies the command to be executed. */ + abstract DataFlow::Node getCommandName(); + + /** Holds if this node is sanitized whenever it follows `--` in an argument list. */ + predicate doubleDashIsSanitizing() { none() } + } +} + +/** + * An instantiation of a template; that is, a call which fills out a template with data. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `TemplateInstantiation::Range` instead. + */ +class TemplateInstantiation extends DataFlow::Node { + TemplateInstantiation::Range self; + + TemplateInstantiation() { this = self } + + /** + * Gets the argument to this template instantiation that is the template being + * instantiated. + */ + DataFlow::Node getTemplateArgument() { result = self.getTemplateArgument() } + + /** + * Gets an argument to this template instantiation that is data being inserted + * into the template. + */ + DataFlow::Node getADataArgument() { result = self.getADataArgument() } +} + +/** Provides a class for modeling new template-instantiation APIs. */ +module TemplateInstantiation { + /** + * An instantiation of a template; that is, a call which fills out a template with data. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `TemplateInstantiation` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument to this template instantiation that is the template being + * instantiated. + */ + abstract DataFlow::Node getTemplateArgument(); + + /** + * Gets an argument to this template instantiation that is data being inserted + * into the template. + */ + abstract DataFlow::Node getADataArgument(); + } +} + +/** + * A data-flow node that performs a file system access, including reading and writing data, + * creating and deleting files and folders, checking and updating permissions, and so on. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `FileSystemAccess::Range` instead. + */ +class FileSystemAccess extends DataFlow::Node { + FileSystemAccess::Range self; + + FileSystemAccess() { this = self } + + /** Gets an argument to this file system access that is interpreted as a path. */ + DataFlow::Node getAPathArgument() { result = self.getAPathArgument() } +} + +/** Provides a class for modeling new file-system access APIs. */ +module FileSystemAccess { + /** + * A data-flow node that performs a file system access, including reading and writing data, + * creating and deleting files and folders, checking and updating permissions, and so on. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `FileSystemAccess` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets an argument to this file system access that is interpreted as a path. */ + abstract DataFlow::Node getAPathArgument(); + } +} + +/** A function that escapes meta-characters to prevent injection attacks. */ +class EscapeFunction extends Function { + EscapeFunction::Range self; + + EscapeFunction() { this = self } + + /** + * The context that this function escapes for. + * + * Currently, this can be "js", "html", or "url". + */ + string kind() { result = self.kind() } +} + +/** Provides a class for modeling new escape-function APIs. */ +module EscapeFunction { + /** + * A function that escapes meta-characters to prevent injection attacks. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `EscapeFunction' instead. + */ + abstract class Range extends Function { + /** + * The context that this function escapes for. + * + * Currently, this can be `js', `html', or `url'. + */ + abstract string kind(); + } +} + +/** + * A function that escapes a string so it can be safely included in a + * JavaScript string literal. + */ +class JsEscapeFunction extends EscapeFunction { + JsEscapeFunction() { self.kind() = "js" } +} + +/** + * A function that escapes a string so it can be safely included in an + * the body of an HTML element, for example, replacing `{}` in + * `

{}

`. + */ +class HtmlEscapeFunction extends EscapeFunction { + HtmlEscapeFunction() { self.kind() = "html" } +} + +/** + * A function that escapes a string so it can be safely included as part + * of a URL. + */ +class UrlEscapeFunction extends EscapeFunction { + UrlEscapeFunction() { self.kind() = "url" } +} + +/** + * A node whose value is interpreted as a part of a regular expression. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RegexpPattern::Range` instead. + */ +class RegexpPattern extends DataFlow::Node { + RegexpPattern::Range self; + + RegexpPattern() { this = self } + + /** + * Gets the node where this pattern is parsed as a part of a regular + * expression. + */ + DataFlow::Node getAParse() { result = self.getAParse() } + + /** + * Gets this regexp pattern as a string. + */ + string getPattern() { result = self.getPattern() } + + /** + * Gets a use of this pattern, either as itself in an argument to a function or as a compiled + * regexp object. + */ + DataFlow::Node getAUse() { result = self.getAUse() } +} + +/** Provides a class for modeling new regular-expression APIs. */ +module RegexpPattern { + /** + * A node whose value is interpreted as a part of a regular expression. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RegexpPattern' instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets a node where the pattern of this node is parsed as a part of + * a regular expression. + */ + abstract DataFlow::Node getAParse(); + + /** + * Gets this regexp pattern as a string. + */ + abstract string getPattern(); + + /** + * Gets a use of this pattern, either as itself in an argument to a function or as a compiled + * regexp object. + */ + abstract DataFlow::Node getAUse(); + } +} + +/** + * A function that matches a regexp with a string or byte slice. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RegexpMatchFunction::Range` instead. + */ +class RegexpMatchFunction extends Function { + RegexpMatchFunction::Range self; + + RegexpMatchFunction() { this = self } + + /** + * Gets the function input that is the regexp being matched. + */ + FunctionInput getRegexpArg() { result = self.getRegexpArg() } + + /** + * Gets the regexp pattern that is used in the call to this function `call`. + */ + RegexpPattern getRegexp(DataFlow::CallNode call) { + result.getAUse() = this.getRegexpArg().getNode(call) + } + + /** + * Gets the function input that is the string being matched against. + */ + FunctionInput getValue() { result = self.getValue() } + + /** + * Gets the function output that is the Boolean result of the match function. + */ + FunctionOutput getResult() { result = self.getResult() } +} + +/** Provides a class for modeling new regular-expression matcher APIs. */ +module RegexpMatchFunction { + /** + * A function that matches a regexp with a string or byte slice. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RegexpPattern' instead. + */ + abstract class Range extends Function { + /** + * Gets the function input that is the regexp being matched. + */ + abstract FunctionInput getRegexpArg(); + + /** + * Gets the function input that is the string being matched against. + */ + abstract FunctionInput getValue(); + + /** + * Gets the Boolean result of the match function. + */ + abstract FunctionOutput getResult(); + } +} + +/** + * A function that uses a regexp to replace parts of a string or byte slice. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RegexpReplaceFunction::Range` instead. + */ +class RegexpReplaceFunction extends Function { + RegexpReplaceFunction::Range self; + + RegexpReplaceFunction() { this = self } + + /** + * Gets the function input that is the regexp that matches text to replace. + */ + FunctionInput getRegexpArg() { result = self.getRegexpArg() } + + /** + * Gets the regexp pattern that is used to match patterns to replace in the call to this function + * `call`. + */ + RegexpPattern getRegexp(DataFlow::CallNode call) { + result.getAUse() = call.(DataFlow::MethodCallNode).getReceiver() + } + + /** + * Gets the function input corresponding to the source value, that is, the value that is having + * its contents replaced. + */ + FunctionInput getSource() { result = self.getSource() } + + /** + * Gets the function output corresponding to the result, that is, the value after replacement has + * occurred. + */ + FunctionOutput getResult() { result = self.getResult() } +} + +/** Provides a class for modeling new regular-expression replacer APIs. */ +module RegexpReplaceFunction { + /** + * A function that uses a regexp to replace parts of a string or byte slice. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RegexpReplaceFunction' instead. + */ + abstract class Range extends Function { + /** + * Gets the function input that is the regexp that matches text to replace. + */ + abstract FunctionInput getRegexpArg(); + + /** + * Gets the function input corresponding to the source value, that is, the value that is having + * its contents replaced. + */ + abstract FunctionInput getSource(); + + /** + * Gets the function output corresponding to the result, that is, the value after replacement + * has occurred. + */ + abstract FunctionOutput getResult(); + } +} + +/** + * A call to a logging mechanism. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `LoggerCall::Range` instead. + */ +class LoggerCall extends DataFlow::Node { + LoggerCall::Range self; + + LoggerCall() { this = self } + + /** Gets a node that is a part of the logged message. */ + DataFlow::Node getAMessageComponent() { result = self.getAMessageComponent() } +} + +/** Provides a class for modeling new logging APIs. */ +module LoggerCall { + /** + * A call to a logging mechanism. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `LoggerCall` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets a node that is a part of the logged message. */ + abstract DataFlow::Node getAMessageComponent(); + } +} + +/** + * A function that encodes data into a binary or textual format. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `MarshalingFunction::Range` instead. + */ +class MarshalingFunction extends Function { + MarshalingFunction::Range self; + + MarshalingFunction() { this = self } + + /** Gets an input that is encoded by this function. */ + FunctionInput getAnInput() { result = self.getAnInput() } + + /** Gets the output that contains the encoded data produced by this function. */ + FunctionOutput getOutput() { result = self.getOutput() } + + /** Gets an identifier for the format this function encodes into, such as "JSON". */ + string getFormat() { result = self.getFormat() } +} + +/** Provides a class for modeling new marshaling APIs. */ +module MarshalingFunction { + /** + * A function that encodes data into a binary or textual format. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `MarshalingFunction` instead. + */ + abstract class Range extends Function { + /** Gets an input that is encoded by this function. */ + abstract FunctionInput getAnInput(); + + /** Gets the output that contains the encoded data produced by this function. */ + abstract FunctionOutput getOutput(); + + /** Gets an identifier for the format this function encodes into, such as "JSON". */ + abstract string getFormat(); + } +} + +/** + * A function that decodes data from a binary or textual format. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `UnmarshalingFunction::Range` instead. + */ +class UnmarshalingFunction extends Function { + UnmarshalingFunction::Range self; + + UnmarshalingFunction() { this = self } + + /** Gets an input that is decoded by this function. */ + FunctionInput getAnInput() { result = self.getAnInput() } + + /** Gets the output that contains the decoded data produced by this function. */ + FunctionOutput getOutput() { result = self.getOutput() } + + /** Gets an identifier for the format this function decodes from, such as "JSON". */ + string getFormat() { result = self.getFormat() } +} + +/** Provides a class for modeling new unmarshaling APIs. */ +module UnmarshalingFunction { + /** + * A function that decodes data from a binary or textual format. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `UnmarshalingFunction` instead. + */ + abstract class Range extends Function { + /** Gets an input that is decoded by this function. */ + abstract FunctionInput getAnInput(); + + /** Gets the output that contains the decoded data produced by this function. */ + abstract FunctionOutput getOutput(); + + /** Gets an identifier for the format this function decodes from, such as "JSON". */ + abstract string getFormat(); + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Decls.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Decls.qll new file mode 100644 index 00000000000..512ce6ddac5 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Decls.qll @@ -0,0 +1,610 @@ +/** + * Provides classes for working with declarations. + */ + +import go + +/** + * A declaration. + */ +class Decl extends @decl, ExprParent, StmtParent, FieldParent { + /** + * Gets the kind of this declaration, which is an integer value representing the declaration's + * node type. + * + * Note that the mapping from node types to integer kinds is considered an implementation detail + * and subject to change without notice. + */ + int getKind() { decls(this, result, _, _) } + + /** + * Holds if the execution of this statement may produce observable side effects. + * + * Memory allocation is not considered an observable side effect. + */ + predicate mayHaveSideEffects() { none() } +} + +/** + * A bad declaration, that is, a declaration that cannot be parsed. + */ +class BadDecl extends @baddecl, Decl { + override string toString() { result = "bad declaration" } + + override string getAPrimaryQlClass() { result = "BadDecl" } +} + +/** + * A generic declaration. + */ +class GenDecl extends @gendecl, Decl, Documentable { + /** Gets the `i`th declaration specifier in this declaration (0-based). */ + Spec getSpec(int i) { specs(result, _, this, i) } + + /** Gets a declaration specifier in this declaration. */ + Spec getASpec() { result = getSpec(_) } + + /** Gets the number of declaration specifiers in this declaration. */ + int getNumSpec() { result = count(getASpec()) } + + override predicate mayHaveSideEffects() { getASpec().mayHaveSideEffects() } + + override string getAPrimaryQlClass() { result = "GenDecl" } +} + +/** + * An import declaration. + */ +class ImportDecl extends @importdecl, GenDecl { + override string toString() { result = "import declaration" } + + override string getAPrimaryQlClass() { result = "ImportDecl" } +} + +/** + * A constant declaration. + */ +class ConstDecl extends @constdecl, GenDecl { + override string toString() { result = "constant declaration" } + + override string getAPrimaryQlClass() { result = "ConstDecl" } +} + +/** + * A type declaration. + */ +class TypeDecl extends @typedecl, GenDecl { + override string toString() { result = "type declaration" } + + override string getAPrimaryQlClass() { result = "TypeDecl" } +} + +/** + * A variable declaration. + */ +class VarDecl extends @vardecl, GenDecl { + override string toString() { result = "variable declaration" } + + override string getAPrimaryQlClass() { result = "VarDecl" } +} + +/** + * A function definition, that is, either a function declaration or + * a function literal. + */ +class FuncDef extends @funcdef, StmtParent, ExprParent { + /** Gets the body of the defined function, if any. */ + BlockStmt getBody() { none() } + + /** Gets the name of the defined function, if any. */ + string getName() { none() } + + /** Gets the expression denoting the type of this function. */ + FuncTypeExpr getTypeExpr() { none() } + + /** Gets the type of this function. */ + SignatureType getType() { none() } + + /** Gets the scope induced by this function. */ + FunctionScope getScope() { result.getFunction() = this } + + /** Gets a `defer` statement in this function. */ + DeferStmt getADeferStmt() { result.getEnclosingFunction() = this } + + /** Gets the `i`th result variable of this function. */ + ResultVariable getResultVar(int i) { result.isResultOf(this, i) } + + /** Gets a result variable of this function. */ + ResultVariable getAResultVar() { result.getFunction() = this } + + /** + * Gets the `i`th parameter of this function. + * + * The receiver variable, if any, is considered to be the -1st parameter. + */ + Parameter getParameter(int i) { result.isParameterOf(this, i) } + + /** Gets a parameter of this function. */ + Parameter getAParameter() { result.getFunction() = this } + + /** + * Gets the number of parameters of this function. + */ + int getNumParameter() { result = count(getAParameter()) } + + /** + * Gets a call to this function. + */ + DataFlow::CallNode getACall() { result.getACallee() = this } + + override string getAPrimaryQlClass() { result = "FuncDef" } +} + +/** + * A function declaration. + */ +class FuncDecl extends @funcdecl, Decl, Documentable, FuncDef { + /** Gets the identifier denoting the name of this function. */ + Ident getNameExpr() { result = getChildExpr(0) } + + override string getName() { result = getNameExpr().getName() } + + override FuncTypeExpr getTypeExpr() { result = getChildExpr(1) } + + override SignatureType getType() { result = getNameExpr().getType() } + + /** Gets the body of this function, if any. */ + override BlockStmt getBody() { result = getChildStmt(2) } + + /** Gets the function declared by this function declaration. */ + DeclaredFunction getFunction() { this = result.getFuncDecl() } + + override string toString() { result = "function declaration" } + + override string getAPrimaryQlClass() { result = "FuncDecl" } +} + +/** + * A method declaration. + */ +class MethodDecl extends FuncDecl { + ReceiverDecl recv; + + MethodDecl() { recv.getFunction() = this } + + /** + * Gets the receiver declaration of this method. + * + * For example, the receiver declaration of + * + * ``` + * func (p *Rectangle) Area() float64 { ... } + * ``` + * + * is `p *Rectangle`. + */ + ReceiverDecl getReceiverDecl() { result = recv } + + /** + * Gets the receiver type of this method. + * + * For example, the receiver type of + * + * ``` + * func (p *Rectangle) Area() float64 { ... } + * ``` + * + * is `*Rectangle`. + */ + Type getReceiverType() { result = getReceiverDecl().getType() } + + /** + * Gets the receiver base type of this method. + * + * For example, the receiver base type of + * + * ``` + * func (p *Rectangle) Area() float64 { ... } + * ``` + * + * is `Rectangle`. + */ + NamedType getReceiverBaseType() { + result = getReceiverType() or + result = getReceiverType().(PointerType).getBaseType() + } + + /** + * Gets the receiver variable of this method. + * + * For example, the receiver variable of + * + * ``` + * func (p *Rectangle) Area() float64 { ... } + * ``` + * + * is the variable `p`. + */ + ReceiverVariable getReceiver() { result.getFunction() = this } + + override string getAPrimaryQlClass() { result = "MethodDecl" } +} + +/** + * A declaration specifier. + */ +class Spec extends @spec, ExprParent, Documentable { + /** Gets the declaration to which this specifier belongs */ + Decl getParentDecl() { specs(this, _, result, _) } + + /** + * Gets the kind of this specifier, which is an integer value representing the specifier's + * node type. + * + * Note that the mapping from node types to integer kinds is considered an implementation detail + * and subject to change without notice. + */ + int getKind() { specs(this, result, _, _) } + + /** + * Holds if the execution of this specifier may produce observable side effects. + * + * Memory allocation is not considered an observable side effect. + */ + predicate mayHaveSideEffects() { none() } + + override string getAPrimaryQlClass() { result = "Spec" } +} + +/** + * An import specifier. + */ +class ImportSpec extends @importspec, Spec { + /** Gets the identifier denoting the imported name. */ + Ident getNameExpr() { result = getChildExpr(0) } + + /** Gets the imported name. */ + string getName() { result = getNameExpr().getName() } + + /** Gets the string literal denoting the imported path. */ + StringLit getPathExpr() { result = getChildExpr(1) } + + /** Gets the imported path. */ + string getPath() { result = getPathExpr().getValue() } + + override string toString() { result = "import specifier" } + + override string getAPrimaryQlClass() { result = "ImportSpec" } +} + +/** + * A constant or variable declaration specifier. + */ +class ValueSpec extends @valuespec, Spec { + /** Gets the identifier denoting the `i`th name declared by this specifier (0-based). */ + Ident getNameExpr(int i) { + i >= 0 and + result = getChildExpr(-(i + 1)) + } + + /** Holds if this specifier is a part of a constant declaration. */ + predicate isConstSpec() { this.getParentDecl() instanceof ConstDecl } + + /** Gets an identifier denoting a name declared by this specifier. */ + Ident getANameExpr() { result = getNameExpr(_) } + + /** Gets the `i`th name declared by this specifier (0-based). */ + string getName(int i) { result = getNameExpr(i).getName() } + + /** Gets a name declared by this specifier. */ + string getAName() { result = getName(_) } + + /** Gets the number of names declared by this specifier. */ + int getNumName() { result = count(getANameExpr()) } + + /** Gets the expression denoting the type of the symbols declared by this specifier. */ + Expr getTypeExpr() { result = getChildExpr(0) } + + /** Gets the `i`th initializer of this specifier (0-based). */ + Expr getInit(int i) { + i >= 0 and + result = getChildExpr(i + 1) + } + + /** Gets an initializer of this specifier. */ + Expr getAnInit() { result = getInit(_) } + + /** Gets the number of initializers of this specifier. */ + int getNumInit() { result = count(getAnInit()) } + + /** Gets the unique initializer of this specifier, if there is only one. */ + Expr getInit() { getNumInit() = 1 and result = getInit(0) } + + /** + * Gets the specifier that contains the initializers for this specifier. + * If this valuespec has initializers, the result is itself. Otherwise, it is the + * last specifier declared before this one that has initializers. + */ + private ValueSpec getEffectiveSpec() { + (exists(this.getAnInit()) or not this.isConstSpec()) and + result = this + or + not exists(this.getAnInit()) and + exists(ConstDecl decl, int idx | + decl = this.getParentDecl() and + decl.getSpec(idx) = this + | + result = decl.getSpec(idx - 1).(ValueSpec).getEffectiveSpec() + ) + } + + /** + * Gets the `i`th effective initializer of this specifier, that is, the expression + * that the `i`th name will get initialized to. This is the same as `getInit` + * if it exists, or `getInit` on the last specifier in the declaration that this + * is a child of. + */ + private Expr getEffectiveInit(int i) { result = this.getEffectiveSpec().getInit(i) } + + /** Holds if this specifier initializes `name` to the value of `init`. */ + predicate initializes(string name, Expr init) { + exists(int i | + name = getName(i) and + init = getEffectiveInit(i) + ) + } + + override predicate mayHaveSideEffects() { getAnInit().mayHaveSideEffects() } + + override string toString() { result = "value declaration specifier" } + + override string getAPrimaryQlClass() { result = "ValueSpec" } +} + +/** + * A type declaration specifier, which is either a type definition or an alias declaration. + * + * Examples: + * + * ``` + * type ( + * status int + * intlist = []int + * ) + * ``` + */ +class TypeSpec extends @typespec, Spec { + /** Gets the identifier denoting the name of the declared type. */ + Ident getNameExpr() { result = getChildExpr(0) } + + /** Gets the name of the declared type. */ + string getName() { result = getNameExpr().getName() } + + /** + * Gets the expression denoting the underlying type to which the newly declared type is bound. + */ + Expr getTypeExpr() { result = getChildExpr(1) } + + override string toString() { result = "type declaration specifier" } + + override string getAPrimaryQlClass() { result = "TypeSpec" } +} + +/** + * An alias declaration specifier. + * + * Examples: + * + * ``` + * type intlist = []int + * ``` + */ +class AliasSpec extends @aliasspec, TypeSpec { } + +/** + * A type definition specifier. + * + * Examples: + * + * ``` + * type status int + * ``` + */ +class TypeDefSpec extends @typedefspec, TypeSpec { } + +/** + * A field declaration, of a struct, a function (in which case this is a parameter or result variable), + * or an interface (in which case this is a method or embedding spec). + */ +class FieldBase extends @field, ExprParent { + /** + * Gets the expression representing the type of the fields declared in this declaration. + */ + Expr getTypeExpr() { result = getChildExpr(0) } + + /** + * Gets the type of the fields declared in this declaration. + */ + Type getType() { result = getTypeExpr().getType() } +} + +/** + * A field declaration in a struct type. + */ +class FieldDecl extends FieldBase, Documentable, ExprParent { + StructTypeExpr st; + + FieldDecl() { this = st.getField(_) } + + /** + * Gets the expression representing the name of the `i`th field declared in this declaration + * (0-based). + */ + Expr getNameExpr(int i) { + i >= 0 and + result = getChildExpr(i + 1) + } + + /** Gets the tag expression of this field declaration, if any. */ + Expr getTag() { result = getChildExpr(-1) } + + /** Gets the struct type expression to which this field declaration belongs. */ + StructTypeExpr getDeclaringStructTypeExpr() { result = st } + + /** Gets the struct type to which this field declaration belongs. */ + StructType getDeclaringType() { result = getDeclaringStructTypeExpr().getType() } + + override string toString() { result = "field declaration" } + + override string getAPrimaryQlClass() { result = "FieldDecl" } +} + +/** + * An embedded field declaration in a struct. + */ +class EmbeddedFieldDecl extends FieldDecl { + EmbeddedFieldDecl() { not exists(this.getNameExpr(_)) } + + override string getAPrimaryQlClass() { result = "EmbeddedFieldDecl" } +} + +/** + * A function parameter or result variable declaration. + */ +class ParameterOrResultDecl extends FieldBase, Documentable, ExprParent { + int rawIndex; + FuncTypeExpr ft; + + ParameterOrResultDecl() { this = ft.getField(rawIndex) } + + /** + * Gets the function type expression to which this declaration belongs. + */ + FuncTypeExpr getFunctionTypeExpr() { result = ft } + + /** + * Gets the function to which this declaration belongs. + */ + FuncDef getFunction() { result.getTypeExpr() = getFunctionTypeExpr() } + + /** + * Gets the expression representing the name of the `i`th variable declared in this declaration + * (0-based). + */ + Expr getNameExpr(int i) { + i >= 0 and + result = getChildExpr(i + 1) + } + + /** + * Gets an expression representing the name of a variable declared in this declaration. + */ + Expr getANameExpr() { result = getNameExpr(_) } +} + +/** + * A parameter declaration. + */ +class ParameterDecl extends ParameterOrResultDecl { + ParameterDecl() { rawIndex >= 0 } + + /** + * Gets the index of this parameter declarations among all parameter declarations of + * its associated function type. + */ + int getIndex() { result = rawIndex } + + override string toString() { result = "parameter declaration" } + + override string getAPrimaryQlClass() { result = "ParameterDecl" } +} + +/** + * A receiver declaration in a function declaration. + */ +class ReceiverDecl extends FieldBase, Documentable, ExprParent { + FuncDecl fd; + + ReceiverDecl() { fd.getField(-1) = this } + + /** + * Gets the function declaration to which this receiver belongs. + */ + FuncDecl getFunction() { result = fd } + + /** + * Gets the expression representing the name of the receiver declared in this declaration. + */ + Expr getNameExpr() { result = getChildExpr(1) } + + override string toString() { result = "receiver declaration" } + + override string getAPrimaryQlClass() { result = "ReceiverDecl" } +} + +/** + * A result variable declaration. + */ +class ResultVariableDecl extends ParameterOrResultDecl { + ResultVariableDecl() { rawIndex < 0 } + + /** + * Gets the index of this result variable declaration among all result variable declarations of + * its associated function type. + */ + int getIndex() { result = -(rawIndex + 1) } + + override string toString() { result = "result variable declaration" } + + override string getAPrimaryQlClass() { result = "ResultVariableDecl" } +} + +/** + * A method or embedding specification in an interface type expression. + */ +class InterfaceMemberSpec extends FieldBase, Documentable, ExprParent { + InterfaceTypeExpr ite; + int idx; + + InterfaceMemberSpec() { this = ite.getField(idx) } + + /** + * Gets the interface type expression to which this member specification belongs. + */ + InterfaceTypeExpr getInterfaceTypeExpr() { result = ite } + + /** + * Gets the index of this member specification among all member specifications of + * its associated interface type expression. + */ + int getIndex() { result = idx } +} + +/** + * A method specification in an interface. + */ +class MethodSpec extends InterfaceMemberSpec { + Expr name; + + MethodSpec() { name = getChildExpr(1) } + + /** + * Gets the expression representing the name of the method declared in this specification. + */ + Expr getNameExpr() { result = name } + + override string toString() { result = "method declaration" } + + override string getAPrimaryQlClass() { result = "MethodSpec" } +} + +/** + * An embedding specification in an interface. + */ +class EmbeddingSpec extends InterfaceMemberSpec { + EmbeddingSpec() { not exists(getChildExpr(1)) } + + override string toString() { result = "interface embedding" } + + override string getAPrimaryQlClass() { result = "EmbeddingSpec" } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Errors.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Errors.qll new file mode 100644 index 00000000000..cf83a87ff15 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Errors.qll @@ -0,0 +1,53 @@ +/** Provides classes for working with Go frontend errors recorded during extraction. */ + +import go + +/** + * An error reported by the Go frontend during extraction. + */ +class Error extends @error { + /** Gets the message associated with this error. */ + string getMessage() { errors(this, _, result, _, _, _, _, _, _) } + + /** Gets the raw position reported by the frontend for this error. */ + string getRawPosition() { errors(this, _, _, result, _, _, _, _, _) } + + /** Gets the package in which this error was reported. */ + Package getPackage() { errors(this, _, _, _, _, _, _, result, _) } + + /** Gets the index of this error among all errors reported for the same package. */ + int getIndex() { errors(this, _, _, _, _, _, _, _, result) } + + /** Gets the file in which this error was reported, if it can be determined. */ + ExtractedOrExternalFile getFile() { hasLocationInfo(result.getAbsolutePath(), _, _, _, _) } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://lgtm.com/help/ql/locations). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + errors(this, _, _, _, filepath, startline, startcolumn, _, _) and + endline = startline and + endcolumn = startcolumn + } + + /** Gets a textual representation of this error. */ + string toString() { result = getMessage() } +} + +/** An error reported by an unknown part of the Go frontend. */ +class UnknownError extends Error, @unknownerror { } + +/** An error reported by the Go frontend driver. */ +class ListError extends Error, @listerror { } + +/** An error reported by the Go parser. */ +class ParseError extends Error, @parseerror { } + +/** An error reported by the Go type checker. */ +class TypeError extends Error, @typeerror { } diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Expr.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Expr.qll new file mode 100644 index 00000000000..647d00f315c --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Expr.qll @@ -0,0 +1,2109 @@ +/** + * Provides classes for working with expressions. + */ + +import go + +/** + * An expression. + * + * Examples: + * + * ```go + * x + 1 + * y < 0 + * ``` + */ +class Expr extends @expr, ExprParent { + /** + * Gets the kind of this expression, which is an integer value representing the expression's + * node type. + * + * Note that the mapping from node types to integer kinds is considered an implementation detail + * and subject to change without notice. + */ + int getKind() { exprs(this, result, _, _) } + + /** Gets this expression, with any surrounding parentheses removed. */ + Expr stripParens() { result = this } + + /** + * Holds if this expression is constant, that is, if its value is determined at + * compile-time. + */ + predicate isConst() { constvalues(this, _, _) } + + /** + * Gets the boolean value this expression evalutes to, if any. + */ + boolean getBoolValue() { + this.getType().getUnderlyingType() instanceof BoolType and + exists(string val | constvalues(this, val, _) | + val = "true" and result = true + or + val = "false" and result = false + ) + } + + /** Gets the floating-point value this expression evaluates to, if any. */ + float getFloatValue() { + this.getType().getUnderlyingType() instanceof FloatType and + exists(string val | constvalues(this, val, _) | result = val.toFloat()) + } + + /** + * Gets the integer value this expression evaluates to, if any. + * + * Note that this does not have a result if the value is too large to fit in a + * 32-bit signed integer type. + */ + int getIntValue() { + this.getType().getUnderlyingType() instanceof IntegerType and + exists(string val | constvalues(this, val, _) | result = val.toInt()) + } + + /** Gets either `getFloatValue` or `getIntValue`. */ + float getNumericValue() { result = this.getFloatValue() or result = this.getIntValue() } + + /** + * Holds if the complex value this expression evaluates to has real part `real` and imaginary + * part `imag`. + */ + predicate hasComplexValue(float real, float imag) { + this.getType().getUnderlyingType() instanceof ComplexType and + exists(string val | constvalues(this, val, _) | + exists(string cmplxre | + cmplxre = "^\\((.+) \\+ (.+)i\\)$" and + real = val.regexpCapture(cmplxre, 1).toFloat() and + imag = val.regexpCapture(cmplxre, 2).toFloat() + ) + ) + } + + /** Gets the string value this expression evaluates to, if any. */ + string getStringValue() { + this.getType().getUnderlyingType() instanceof StringType and + constvalues(this, result, _) + } + + /** + * Gets the string representation of the exact value this expression + * evaluates to, if any. + * + * For example, for the constant 3.141592653589793238462, this will + * result in 1570796326794896619231/500000000000000000000 + */ + string getExactValue() { constvalues(this, _, result) } + + /** + * Holds if this expression has a constant value which is guaranteed not to depend on the + * platform where it is evaluated. + * + * This is a conservative approximation, that is, the predicate may fail to hold for expressions + * whose value is platform independent, but it will never hold for expressions whose value is not + * platform independent. + * + * Examples of platform-dependent constants include constants declared in files with build + * constraints, the value of `runtime.GOOS`, and the return value of `unsafe.Sizeof`. + */ + predicate isPlatformIndependentConstant() { none() } + + /** Gets the type of this expression. */ + Type getType() { + type_of(this, result) + or + not type_of(this, _) and + result instanceof InvalidType + } + + /** + * Gets the global value number of this expression. + * + * Expressions with the same global value number are guaranteed to have the same value at runtime. + * The converse does not hold in general, that is, expressions with different global value numbers + * may still have the same value at runtime. + */ + GVN getGlobalValueNumber() { result = globalValueNumber(DataFlow::exprNode(this)) } + + /** + * Holds if this expression may have observable side effects of its own (that is, independent + * of whether its sub-expressions may have side effects). + * + * Memory allocation is not considered an observable side effect. + */ + predicate mayHaveOwnSideEffects() { none() } + + /** + * Holds if the evaluation of this expression may produce observable side effects. + * + * Memory allocation is not considered an observable side effect. + */ + predicate mayHaveSideEffects() { mayHaveOwnSideEffects() or getAChildExpr().mayHaveSideEffects() } + + override string toString() { result = "expression" } +} + +/** + * A bad expression, that is, an expression that could not be parsed. + * + * Examples: + * + * ```go + * x + + * y < + * ``` + */ +class BadExpr extends @badexpr, Expr { + override string toString() { result = "bad expression" } + + override string getAPrimaryQlClass() { result = "BadExpr" } +} + +/** + * An identifier. + * + * Examples: + * + * ```go + * x + * ``` + */ +class Ident extends @ident, Expr { + /** Gets the name of this identifier. */ + string getName() { literals(this, result, _) } + + /** Holds if this identifier is a use of `e`. */ + predicate uses(Entity e) { uses(this, e) } + + /** Holds if this identifier is a definition or declaration of `e` */ + predicate declares(Entity e) { defs(this, e) } + + /** Holds if this identifier refers to (that is, uses, defines or declares) `e`. */ + predicate refersTo(Entity e) { uses(e) or declares(e) } + + override string toString() { result = getName() } + + override string getAPrimaryQlClass() { result = "Ident" } +} + +/** + * The blank identifier `_`. + * + * Examples: + * + * ```go + * _ + * ``` + */ +class BlankIdent extends Ident { + BlankIdent() { getName() = "_" } + + override string getAPrimaryQlClass() { result = "BlankIdent" } +} + +/** + * An ellipsis expression, representing either the `...` type in a parameter list or + * the `...` length in an array type. + * + * Examples: + * + * ```go + * ... + * ``` + */ +class Ellipsis extends @ellipsis, Expr { + /** Gets the operand of this ellipsis expression. */ + Expr getOperand() { result = getChildExpr(0) } + + override string toString() { result = "..." } + + override string getAPrimaryQlClass() { result = "Ellipsis" } +} + +/** + * A literal expression. + * + * Examples: + * + * ```go + * "hello" + * func(x, y int) int { return x + y } + * map[string]int{"A": 1, "B": 2} + * ``` + */ +class Literal extends Expr { + Literal() { + this instanceof @basiclit or this instanceof @funclit or this instanceof @compositelit + } +} + +/** + * A literal expression of basic type. + * + * Examples: + * + * ```go + * 1 + * "hello" + * ``` + */ +class BasicLit extends @basiclit, Literal { + /** Gets the value of this literal expressed as a string. */ + string getValue() { literals(this, result, _) } + + /** Gets the raw program text corresponding to this literal. */ + string getText() { literals(this, _, result) } + + override predicate isConst() { + // override to make sure literals are always considered constants even if we did not get + // information about constant values from the extractor (for example due to missing + // type information) + any() + } + + override predicate isPlatformIndependentConstant() { any() } + + override string toString() { result = getText() } +} + +/** + * An integer literal. + * + * Examples: + * + * ```go + * 256 + * ``` + */ +class IntLit extends @intlit, BasicLit { + override string getAPrimaryQlClass() { result = "IntLit" } +} + +/** + * A floating-point literal. + * + * Examples: + * + * ```go + * 2.71828 + * ``` + */ +class FloatLit extends @floatlit, BasicLit { + override string getAPrimaryQlClass() { result = "FloatLit" } +} + +/** + * An imaginary literal. + * + * Examples: + * + * ```go + * 2i + * 2.7i + * ``` + */ +class ImagLit extends @imaglit, BasicLit { + override string getAPrimaryQlClass() { result = "ImagLit" } +} + +/** + * A rune literal. + * + * Examples: + * + * ```go + * 'a' + * 'ä' + * '本' + * '\377' + * '\xff' + * '\u12e4' + * '\U00101234' + * '\n' + * ``` + */ +class CharLit extends @charlit, BasicLit { + // use the constant value of the literal as the string value, as the value we get from the + // compiler is an integer, meaning we would not otherwise have a string value for rune literals + override string getStringValue() { result = this.getValue() } + + override string getAPrimaryQlClass() { result = "CharLit" } +} + +class RuneLit = CharLit; + +/** + * A string literal. + * + * Examples: + * + * ```go + * "hello world" + * ``` + */ +class StringLit extends @stringlit, BasicLit { + override string getAPrimaryQlClass() { result = "StringLit" } + + /** Holds if this string literal is a raw string literal. */ + predicate isRaw() { this.getText().matches("`%`") } +} + +/** + * A function literal. + * + * Examples: + * + * ```go + * func(x, y int) int { return x + y } + * ``` + */ +class FuncLit extends @funclit, Literal, StmtParent, FuncDef { + override FuncTypeExpr getTypeExpr() { result = getChildExpr(0) } + + override SignatureType getType() { result = Literal.super.getType() } + + /** Gets the body of this function literal. */ + override BlockStmt getBody() { result = getChildStmt(1) } + + override predicate isPlatformIndependentConstant() { any() } + + override string toString() { result = "function literal" } + + override string getAPrimaryQlClass() { result = "FuncLit" } +} + +/** + * A composite literal + * + * Examples: + * + * ```go + * Point3D{0.5, -0.5, 0.5} + * map[string]int{"A": 1, "B": 2} + * ``` + */ +class CompositeLit extends @compositelit, Literal { + /** Gets the expression representing the type of this composite literal. */ + Expr getTypeExpr() { result = getChildExpr(0) } + + /** Gets the `i`th element of this composite literal (0-based). */ + Expr getElement(int i) { + i >= 0 and + result = getChildExpr(i + 1) + } + + /** Gets an element of this composite literal. */ + Expr getAnElement() { result = getElement(_) } + + /** Gets the number of elements in this composite literal. */ + int getNumElement() { result = count(getAnElement()) } + + /** + * Gets the `i`th key expression in this literal. + * + * If the `i`th element of this literal has no key, this predicate is undefined for `i`. + */ + Expr getKey(int i) { result = getElement(i).(KeyValueExpr).getKey() } + + /** + * Gets the `i`th value expression in this literal. + */ + Expr getValue(int i) { + exists(Expr elt | elt = getElement(i) | + result = elt.(KeyValueExpr).getValue() + or + not elt instanceof KeyValueExpr and result = elt + ) + } + + override string toString() { result = "composite literal" } + + override string getAPrimaryQlClass() { result = "CompositeLit" } +} + +/** + * A map literal. + * + * Examples: + * + * ```go + * map[string]int{"A": 1, "B": 2} + * ``` + */ +class MapLit extends CompositeLit { + MapType mt; + + MapLit() { mt = getType().getUnderlyingType() } + + /** Gets the key type of this literal. */ + Type getKeyType() { result = mt.getKeyType() } + + /** Gets the value type of this literal. */ + Type getValueType() { result = mt.getValueType() } + + override string toString() { result = "map literal" } + + override string getAPrimaryQlClass() { result = "MapLit" } +} + +/** + * A struct literal. + * + * Examples: + * + * ```go + * Point3D{0.5, -0.5, 0.5} + * Point3D{y: 1} + * Point3D{} + * ``` + */ +class StructLit extends CompositeLit { + StructType st; + + StructLit() { st = getType().getUnderlyingType() } + + /** Gets the struct type underlying this literal. */ + StructType getStructType() { result = st } + + override string toString() { result = "struct literal" } + + override string getAPrimaryQlClass() { result = "StructLit" } +} + +/** + * An array or slice literal. + * + * Examples: + * + * ```go + * [10]string{} + * [6]int{1, 2, 3, 5} + * [...]string{"Sat", "Sun"} + * []int{1, 2, 3, 5} + * []string{"Sat", "Sun"} + * ``` + */ +class ArrayOrSliceLit extends CompositeLit { + CompositeType type; + + ArrayOrSliceLit() { + type = getType().getUnderlyingType() and + ( + type instanceof ArrayType + or + type instanceof SliceType + ) + } +} + +/** + * An array literal. + * + * Examples: + * + * ```go + * [10]string{} + * [6]int{1, 2, 3, 5} + * [...]string{"Sat", "Sun"} + * ``` + */ +class ArrayLit extends ArrayOrSliceLit { + override ArrayType type; + + /** Gets the array type underlying this literal. */ + ArrayType getArrayType() { result = type } + + override string toString() { result = "array literal" } + + override string getAPrimaryQlClass() { result = "ArrayLit" } +} + +/** + * A slice literal. + * + * Examples: + * + * ```go + * []int{1, 2, 3, 5} + * []string{"Sat", "Sun"} + * ``` + */ +class SliceLit extends ArrayOrSliceLit { + override SliceType type; + + /** Gets the slice type underlying this literal. */ + SliceType getSliceType() { result = type } + + override string toString() { result = "slice literal" } + + override string getAPrimaryQlClass() { result = "SliceLit" } +} + +/** + * A parenthesized expression. + * + * Examples: + * + * ```go + * (x + y) + * ``` + */ +class ParenExpr extends @parenexpr, Expr { + /** Gets the expression between parentheses. */ + Expr getExpr() { result = getChildExpr(0) } + + override Expr stripParens() { result = getExpr().stripParens() } + + override predicate isPlatformIndependentConstant() { getExpr().isPlatformIndependentConstant() } + + override string toString() { result = "(...)" } + + override string getAPrimaryQlClass() { result = "ParenExpr" } +} + +/** + * A selector expression, that is, a base expression followed by a selector. + * + * Examples: + * + * ```go + * x.f + * ``` + */ +class SelectorExpr extends @selectorexpr, Expr { + /** Gets the base of this selector expression. */ + Expr getBase() { result = getChildExpr(0) } + + /** Gets the selector of this selector expression. */ + Ident getSelector() { result = getChildExpr(1) } + + /** Holds if this selector is a use of `e`. */ + predicate uses(Entity e) { getSelector().uses(e) } + + /** Holds if this selector is a definition of `e` */ + predicate declares(Entity e) { getSelector().declares(e) } + + /** Holds if this selector refers to (that is, uses, defines or declares) `e`. */ + predicate refersTo(Entity e) { getSelector().refersTo(e) } + + override predicate mayHaveOwnSideEffects() { any() } + + override string toString() { result = "selection of " + getSelector() } + + override string getAPrimaryQlClass() { result = "SelectorExpr" } +} + +/** + * A selector expression that refers to a promoted field or a promoted method. These + * selectors may implicitly address an embedded struct of their base type - for example, + * the selector `x.field` may implicitly address `x.Embedded.field`). Note they may also + * explicitly address `field`; being a `PromotedSelector` only indicates the addressed + * field or method may be promoted, not that it is promoted in this particular context. + */ +class PromotedSelector extends SelectorExpr { + PromotedSelector() { + exists(ValueEntity ve | this.refersTo(ve) | + ve instanceof PromotedField or ve instanceof PromotedMethod + ) + } + + /** + * Gets the underlying struct type of this selector's base. Note because this selector + * addresses a promoted field, the addressed field may not directly occur in the returned + * struct type. + */ + StructType getSelectedStructType() { + exists(Type baseType | baseType = this.getBase().getType().getUnderlyingType() | + pragma[only_bind_into](result) = + [baseType, baseType.(PointerType).getBaseType().getUnderlyingType()] + ) + } +} + +/** + * An index expression, that is, a base expression followed by an index. + * + * Examples: + * + * ```go + * a[i] + * ``` + */ +class IndexExpr extends @indexexpr, Expr { + /** Gets the base of this index expression. */ + Expr getBase() { result = getChildExpr(0) } + + /** Gets the index of this index expression. */ + Expr getIndex() { result = getChildExpr(1) } + + override predicate mayHaveOwnSideEffects() { any() } + + override string toString() { result = "index expression" } + + override string getAPrimaryQlClass() { result = "IndexExpr" } +} + +/** + * A slice expression, that is, a base expression followed by slice indices. + * + * Examples: + * + * ```go + * a[1:3] + * a[1:3:5] + * ``` + */ +class SliceExpr extends @sliceexpr, Expr { + /** Gets the base of this slice expression. */ + Expr getBase() { result = getChildExpr(0) } + + /** Gets the lower bound of this slice expression. */ + Expr getLow() { result = getChildExpr(1) } + + /** Gets the upper bound of this slice expression. */ + Expr getHigh() { result = getChildExpr(2) } + + /** Gets the maximum of this slice expression, if any. */ + Expr getMax() { result = getChildExpr(3) } + + override string toString() { result = "slice expression" } + + override string getAPrimaryQlClass() { result = "SliceExpr" } +} + +/** + * A type assertion expression. + * + * Examples: + * + * ```go + * x.(T) + * ``` + */ +class TypeAssertExpr extends @typeassertexpr, Expr { + /** Gets the base expression whose type is being asserted. */ + Expr getExpr() { result = getChildExpr(0) } + + /** Gets the expression representing the asserted type. */ + Expr getTypeExpr() { result = getChildExpr(1) } + + override predicate mayHaveOwnSideEffects() { any() } + + override predicate isPlatformIndependentConstant() { getExpr().isPlatformIndependentConstant() } + + override string toString() { result = "type assertion" } + + override string getAPrimaryQlClass() { result = "TypeAssertExpr" } +} + +/** + * An expression that syntactically could either be a function call or a type + * conversion expression. + * + * In most cases, the subclasses `CallExpr` and `ConversionExpr` should be used + * instead. + * + * Examples: + * + * ```go + * f(x) + * g(a, b...) + * []byte("x") + * ``` + */ +class CallOrConversionExpr extends @callorconversionexpr, Expr { + override string getAPrimaryQlClass() { result = "CallOrConversionExpr" } +} + +/** + * A type conversion expression. + * + * Examples: + * + * ```go + * []byte("x") + * ``` + */ +class ConversionExpr extends CallOrConversionExpr { + ConversionExpr() { isTypeExprBottomUp(getChildExpr(0)) } + + /** Gets the type expression representing the target type of the conversion. */ + Expr getTypeExpr() { result = getChildExpr(0) } + + /** Gets the operand of the type conversion. */ + Expr getOperand() { result = getChildExpr(1) } + + override predicate isPlatformIndependentConstant() { + getOperand().isPlatformIndependentConstant() + } + + override string toString() { result = "type conversion" } + + override string getAPrimaryQlClass() { result = "ConversionExpr" } +} + +/** + * A function call expression. + * + * On snapshots with incomplete type information, type conversions may be misclassified + * as function call expressions. + * + * Examples: + * + * ```go + * f(x) + * g(a, b...) + * ``` + */ +class CallExpr extends CallOrConversionExpr { + CallExpr() { + exists(Expr callee | callee = getChildExpr(0) | not isTypeExprBottomUp(callee)) + or + // only calls can have an ellipsis after their last argument + has_ellipsis(this) + } + + /** Gets the expression representing the function being called. */ + Expr getCalleeExpr() { result = getChildExpr(0) } + + /** Gets the `i`th argument expression of this call (0-based). */ + Expr getArgument(int i) { + i >= 0 and + result = getChildExpr(i + 1) + } + + /** Gets an argument expression of this call. */ + Expr getAnArgument() { result = getArgument(_) } + + /** Gets the number of argument expressions of this call. */ + int getNumArgument() { result = count(getAnArgument()) } + + /** Gets the name of the invoked function or method if it can be determined syntactically. */ + string getCalleeName() { + exists(Expr callee | callee = getCalleeExpr().stripParens() | + result = callee.(Ident).getName() + or + result = callee.(SelectorExpr).getSelector().getName() + ) + } + + /** Gets the declared target of this call. */ + Function getTarget() { getCalleeExpr() = result.getAReference() } + + /** Holds if this call has an ellipsis after its last argument. */ + predicate hasEllipsis() { has_ellipsis(this) } + + override predicate mayHaveOwnSideEffects() { + getTarget().mayHaveSideEffects() or + not exists(getTarget()) + } + + override string toString() { + result = "call to " + getCalleeName() + or + not exists(getCalleeName()) and + result = "function call" + } + + override string getAPrimaryQlClass() { result = "CallExpr" } +} + +/** + * A star expression. + * + * Examples: + * + * ```go + * *x + * ``` + */ +class StarExpr extends @starexpr, Expr { + /** Gets the base expression of this star expression. */ + Expr getBase() { result = getChildExpr(0) } + + override predicate mayHaveOwnSideEffects() { any() } + + override string toString() { result = "star expression" } + + override string getAPrimaryQlClass() { result = "StarExpr" } +} + +/** + * A key-value pair in a composite literal. + * + * Examples: + * + * ```go + * "A": 1 + * ``` + */ +class KeyValueExpr extends @keyvalueexpr, Expr { + /** Gets the key expression of this key-value pair. */ + Expr getKey() { result = getChildExpr(0) } + + /** Gets the value expression of this key-value pair. */ + Expr getValue() { result = getChildExpr(1) } + + /** Gets the composite literal to which this key-value pair belongs. */ + CompositeLit getLiteral() { this = result.getElement(_) } + + override string toString() { result = "key-value pair" } + + override string getAPrimaryQlClass() { result = "KeyValueExpr" } +} + +/** + * An expression representing an array type. + * + * Examples: + * + * ```go + * [5]int + * ``` + */ +class ArrayTypeExpr extends @arraytypeexpr, TypeExpr { + /** Gets the length expression of this array type. */ + Expr getLength() { result = getChildExpr(0) } + + /** Gets the expression representing the element type of this array type. */ + Expr getElement() { result = getChildExpr(1) } + + override string toString() { result = "array type" } + + override string getAPrimaryQlClass() { result = "ArrayTypeExpr" } +} + +/** + * An expression representing a struct type. + * + * Examples: + * + * ```go + * struct {x, y int; z float32} + * ``` + */ +class StructTypeExpr extends @structtypeexpr, TypeExpr, FieldParent { + override string toString() { result = "struct type" } + + override string getAPrimaryQlClass() { result = "StructTypeExpr" } +} + +/** + * An expression representing a function type. + * + * Examples: + * + * ```go + * func(a, b int, c float32) (float32, bool) + * ``` + */ +class FuncTypeExpr extends @functypeexpr, TypeExpr, ScopeNode, FieldParent { + /** Gets the `i`th parameter of this function type (0-based). */ + ParameterDecl getParameterDecl(int i) { result = getField(i) and i >= 0 } + + /** Gets a parameter of this function type. */ + ParameterDecl getAParameterDecl() { result = getParameterDecl(_) } + + /** Gets the number of parameters of this function type. */ + int getNumParameter() { result = count(getAParameterDecl()) } + + /** Gets the `i`th result of this function type (0-based). */ + ResultVariableDecl getResultDecl(int i) { result = getField(-(i + 1)) } + + /** Gets a result of this function type. */ + ResultVariableDecl getAResultDecl() { result = getResultDecl(_) } + + /** Gets the number of results of this function type. */ + int getNumResult() { result = count(getAResultDecl()) } + + /** Gets the result of this function type, if there is only one. */ + ResultVariableDecl getResultDecl() { getNumResult() = 1 and result = getAResultDecl() } + + override string toString() { result = "function type" } + + override string getAPrimaryQlClass() { result = "FuncTypeExpr" } + + /** Gets the `i`th child of this node, parameters first followed by results. */ + override AstNode getUniquelyNumberedChild(int i) { + if i < getNumParameter() + then result = getParameterDecl(i) + else result = getResultDecl(i - getNumParameter()) + } +} + +/** + * An expression representing an interface type. + * + * Examples: + * + * ```go + * interface { Read(p []byte) (n int, err error); Close() error} + * ``` + */ +class InterfaceTypeExpr extends @interfacetypeexpr, TypeExpr, FieldParent { + /** Gets the `i`th method specification of this interface type. */ + MethodSpec getMethod(int i) { result = getField(i) } + + /** Gets a method of this interface type. */ + MethodSpec getAMethod() { result = getMethod(_) } + + /** Gets the number of methods of this interface type. */ + int getNumMethod() { result = count(getAMethod()) } + + override string toString() { result = "interface type" } + + override string getAPrimaryQlClass() { result = "InterfaceTypeExpr" } +} + +/** + * An expression representing a map type. + * + * Examples: + * + * ```go + * map[string]int + * ``` + */ +class MapTypeExpr extends @maptypeexpr, TypeExpr { + /** Gets the expression representing the key type of this map type. */ + Expr getKeyTypeExpr() { result = getChildExpr(0) } + + /** Gets the key type of this map type. */ + Type getKeyType() { result = getKeyTypeExpr().getType() } + + /** Gets the expression representing the value type of this map type. */ + Expr getValueTypeExpr() { result = getChildExpr(1) } + + /** Gets the value type of this map type. */ + Type getValueType() { result = getValueTypeExpr().getType() } + + override string toString() { result = "map type" } + + override string getAPrimaryQlClass() { result = "MapTypeExpr" } +} + +/** + * An expression with a (unary or binary) operator. + * + * Examples: + * + * ```go + * a * b + * -c + * ``` + */ +class OperatorExpr extends @operatorexpr, Expr { + /** Gets the operator of this expression. */ + string getOperator() { none() } + + /** Gets an operand of this expression. */ + Expr getAnOperand() { none() } +} + +/** + * An expression with an arithmetic operator like `-` or `/`. + * + * Examples: + * + * ```go + * x - y + * u / v + * ``` + */ +class ArithmeticExpr extends @arithmeticexpr, OperatorExpr { } + +/** + * An expression with a logical operator like `!` or `&&`. + * + * Examples: + * + * ```go + * !a + * b && c + * ``` + */ +class LogicalExpr extends @logicalexpr, OperatorExpr { } + +/** + * An expression with a bitwise operator such as `^` or `|`. + * + * Examples: + * + * ```go + * x ^ y + * a | b + * ``` + */ +class BitwiseExpr extends @bitwiseexpr, OperatorExpr { } + +/** + * An expression with a unary operator. + * + * Examples: + * + * ```go + * +7 + * -2.5i + * !x + * ``` + */ +class UnaryExpr extends @unaryexpr, OperatorExpr { + /** Gets the operand of this unary expression. */ + Expr getOperand() { result = getChildExpr(0) } + + override Expr getAnOperand() { result = this.getOperand() } + + override predicate isPlatformIndependentConstant() { + getOperand().isPlatformIndependentConstant() + } + + override string toString() { result = getOperator() + "..." } +} + +/** + * An expression with a unary arithmetic operator, that is, unary `-` or `+`. + * + * Examples: + * + * ```go + * +7 + * -2.5i + * ``` + */ +class ArithmeticUnaryExpr extends @arithmeticunaryexpr, ArithmeticExpr, UnaryExpr { } + +/** + * An expression with a unary logical operator, that is, `!`. + * + * Examples: + * + * ```go + * !x + * ``` + */ +class LogicalUnaryExpr extends @logicalunaryexpr, LogicalExpr, UnaryExpr { } + +/** + * An expression with a unary bitwise operator, that is, `^`. + * + * Examples: + * + * ```go + * ^x + * ``` + */ +class BitwiseUnaryExpr extends @bitwiseunaryexpr, BitwiseExpr, UnaryExpr { } + +/** + * A unary plus expression using `+`. + * + * Examples: + * + * ```go + * +7 + * ``` + */ +class PlusExpr extends @plusexpr, ArithmeticUnaryExpr { + override string getOperator() { result = "+" } + + override string getAPrimaryQlClass() { result = "PlusExpr" } +} + +/** + * A unary minus expression using `-`. + * + * Examples: + * + * ```go + * -2.5i + * ``` + */ +class MinusExpr extends @minusexpr, ArithmeticUnaryExpr { + override string getOperator() { result = "-" } + + override string getAPrimaryQlClass() { result = "MinusExpr" } +} + +/** + * A unary "not" expression using `!`. + * + * Examples: + * + * ```go + * !x + * ``` + */ +class NotExpr extends @notexpr, LogicalUnaryExpr { + override string getOperator() { result = "!" } + + override string getAPrimaryQlClass() { result = "NotExpr" } +} + +/** + * A unary complement expression using `^`. + * + * Examples: + * + * ```go + * ^x + * ``` + */ +class ComplementExpr extends @complementexpr, BitwiseUnaryExpr { + override string getOperator() { result = "^" } + + override string getAPrimaryQlClass() { result = "ComplementExpr" } +} + +/** + * A unary pointer-dereference expression. + * + * This class exists for compatibility reasons only and should not normally be used directly. Use `StarExpr` instead. + */ +class DerefExpr extends @derefexpr, UnaryExpr { + override predicate mayHaveOwnSideEffects() { any() } + + override string getOperator() { result = "*" } + + override string getAPrimaryQlClass() { result = "DerefExpr" } +} + +/** + * A unary address-of expression using `&`. + * + * Examples: + * + * ```go + * &x + * ``` + */ +class AddressExpr extends @addressexpr, UnaryExpr { + override predicate mayHaveOwnSideEffects() { any() } + + override string getOperator() { result = "&" } + + override string getAPrimaryQlClass() { result = "AddressExpr" } +} + +/** + * A unary receive expression using `<-`. + * + * Examples: + * + * ```go + * <-chan + * ``` + */ +class RecvExpr extends @arrowexpr, UnaryExpr { + override predicate mayHaveOwnSideEffects() { any() } + + override string getOperator() { result = "<-" } + + override string getAPrimaryQlClass() { result = "RecvExpr" } +} + +/** + * A binary expression. + * + * Examples: + * + * ```go + * a * b + * a || b + * b != c + * ``` + */ +class BinaryExpr extends @binaryexpr, OperatorExpr { + /** Gets the left operand of this binary expression. */ + Expr getLeftOperand() { result = getChildExpr(0) } + + /** Gets the right operand of this binary expression. */ + Expr getRightOperand() { result = getChildExpr(1) } + + override Expr getAnOperand() { result = getChildExpr([0 .. 1]) } + + /** Holds if `e` and `f` (in either order) are the two operands of this binary expression. */ + predicate hasOperands(Expr e, Expr f) { + e = getAnOperand() and + f = getAnOperand() and + e != f + } + + override predicate isPlatformIndependentConstant() { + getLeftOperand().isPlatformIndependentConstant() and + getRightOperand().isPlatformIndependentConstant() + } + + override string toString() { result = "..." + getOperator() + "..." } +} + +/** + * A binary arithmetic expression, that is, `+`, `-`, `*`, `/` or `%`. + * + * Examples: + * + * ```go + * a * b + * ``` + */ +class ArithmeticBinaryExpr extends @arithmeticbinaryexpr, ArithmeticExpr, BinaryExpr { } + +/** + * A binary logical expression, that is, `&&` or `||`. + * + * Examples: + * + * ```go + * a || b + * ``` + */ +class LogicalBinaryExpr extends @logicalbinaryexpr, LogicalExpr, BinaryExpr { } + +/** + * A binary bitwise expression, that is, `<<`, `>>`, `|`, `^`, `&` or `&^`. + * + * Examples: + * + * ```go + * a << i + * b ^ c + * ``` + */ +class BitwiseBinaryExpr extends @bitwisebinaryexpr, BitwiseExpr, BinaryExpr { } + +/** + * A shift expression, that is, `<<` or `>>`. + * + * Examples: + * + * ```go + * a << i + * ``` + */ +class ShiftExpr extends @shiftexpr, BitwiseBinaryExpr { } + +/** + * A comparison expression, that is, `==`, `!=`, `<`, `<=`, `>=` or `>`. + * + * Examples: + * + * ```go + * a != b + * c > d + * ``` + */ +class ComparisonExpr extends @comparison, BinaryExpr { } + +/** + * An equality test, that is, `==` or `!=`. + * + * Examples: + * + * ```go + * a != b + * ``` + */ +class EqualityTestExpr extends @equalitytest, ComparisonExpr { + /** Gets the polarity of this equality test, that is, `true` for `==` and `false` for `!=`. */ + boolean getPolarity() { none() } +} + +/** + * A relational comparison, that is, `<`, `<=`, `>=` or `>`. + * + * Examples: + * + * ```go + * c > d + * ``` + */ +class RelationalComparisonExpr extends @relationalcomparison, ComparisonExpr { + /** Holds if this comparison is strict, that is, it implies inequality. */ + predicate isStrict() { none() } + + /** + * Gets the greater operand of this comparison, that is, the right operand for + * a `<` or `<=` comparison, and the left operand for `>=` or `>`. + */ + Expr getGreaterOperand() { none() } + + /** + * Gets the lesser operand of this comparison, that is, the left operand for + * a `<` or `<=` comparison, and the right operand for `>=` or `>`. + */ + Expr getLesserOperand() { none() } +} + +/** + * A logical-or expression using `||`. + * + * Examples: + * + * ```go + * a || b + * ``` + */ +class LorExpr extends @lorexpr, LogicalBinaryExpr { + override string getOperator() { result = "||" } + + override string getAPrimaryQlClass() { result = "LorExpr" } +} + +class LogOrExpr = LorExpr; + +/** + * A logical-and expression using `&&`. + * + * Examples: + * + * ```go + * a && b + * ``` + */ +class LandExpr extends @landexpr, LogicalBinaryExpr { + override string getOperator() { result = "&&" } + + override string getAPrimaryQlClass() { result = "LandExpr" } +} + +class LogAndExpr = LandExpr; + +/** + * An equality test using `==`. + * + * Examples: + * + * ```go + * a == b + * ``` + */ +class EqlExpr extends @eqlexpr, EqualityTestExpr { + override string getOperator() { result = "==" } + + override boolean getPolarity() { result = true } + + override string getAPrimaryQlClass() { result = "EqlExpr" } +} + +class EqExpr = EqlExpr; + +/** + * An inequality test using `!=`. + * + * Examples: + * + * ```go + * a != b + * ``` + */ +class NeqExpr extends @neqexpr, EqualityTestExpr { + override string getOperator() { result = "!=" } + + override boolean getPolarity() { result = false } + + override string getAPrimaryQlClass() { result = "NeqExpr" } +} + +/** + * A less-than test using `<`. + * + * Examples: + * + * ```go + * a < b + * ``` + */ +class LssExpr extends @lssexpr, RelationalComparisonExpr { + override string getOperator() { result = "<" } + + override predicate isStrict() { any() } + + override Expr getLesserOperand() { result = getLeftOperand() } + + override Expr getGreaterOperand() { result = getRightOperand() } + + override string getAPrimaryQlClass() { result = "LssExpr" } +} + +class LTExpr = LssExpr; + +/** + * A less-than-or-equal test using `<=`. + * + * Examples: + * + * ```go + * a <= b + * ``` + */ +class LeqExpr extends @leqexpr, RelationalComparisonExpr { + override string getOperator() { result = "<=" } + + override Expr getLesserOperand() { result = getLeftOperand() } + + override Expr getGreaterOperand() { result = getRightOperand() } + + override string getAPrimaryQlClass() { result = "LeqExpr" } +} + +class LEExpr = LeqExpr; + +/** + * A greater-than test using `>`. + * + * Examples: + * + * ```go + * a > b + * ``` + */ +class GtrExpr extends @gtrexpr, RelationalComparisonExpr { + override string getOperator() { result = ">" } + + override predicate isStrict() { any() } + + override Expr getLesserOperand() { result = getRightOperand() } + + override Expr getGreaterOperand() { result = getLeftOperand() } + + override string getAPrimaryQlClass() { result = "GtrExpr" } +} + +class GTExpr = GtrExpr; + +/** + * A greater-than-or-equal test using `>=`. + * + * Examples: + * + * ```go + * a >= b + * ``` + */ +class GeqExpr extends @geqexpr, RelationalComparisonExpr { + override string getOperator() { result = ">=" } + + override Expr getLesserOperand() { result = getRightOperand() } + + override Expr getGreaterOperand() { result = getLeftOperand() } + + override string getAPrimaryQlClass() { result = "GeqExpr" } +} + +class GEExpr = GeqExpr; + +/** + * An addition expression using `+`. + * + * Examples: + * + * ```go + * a + b + * ``` + */ +class AddExpr extends @addexpr, ArithmeticBinaryExpr { + override string getOperator() { result = "+" } + + override string getAPrimaryQlClass() { result = "AddExpr" } +} + +/** + * A subtraction expression using `-`. + * + * Examples: + * + * ```go + * a - b + * ``` + */ +class SubExpr extends @subexpr, ArithmeticBinaryExpr { + override string getOperator() { result = "-" } + + override string getAPrimaryQlClass() { result = "SubExpr" } +} + +/** + * A bitwise or expression using `|`. + * + * Examples: + * + * ```go + * a | b + * ``` + */ +class OrExpr extends @orexpr, BitwiseBinaryExpr { + override string getOperator() { result = "|" } + + override string getAPrimaryQlClass() { result = "OrExpr" } +} + +class BitOrExpr = OrExpr; + +/** + * An exclusive-or expression using `^`. + * + * Examples: + * + * ```go + * a ^ b + * ``` + */ +class XorExpr extends @xorexpr, BitwiseBinaryExpr { + override string getOperator() { result = "^" } + + override string getAPrimaryQlClass() { result = "XorExpr" } +} + +/** + * A multiplication expression using `*`. + * + * Examples: + * + * ```go + * a * b + * ``` + */ +class MulExpr extends @mulexpr, ArithmeticBinaryExpr { + override string getOperator() { result = "*" } + + override string getAPrimaryQlClass() { result = "MulExpr" } +} + +/** + * A divison or quotient expression using `/`. + * + * Examples: + * + * ```go + * a / b + * ``` + */ +class QuoExpr extends @quoexpr, ArithmeticBinaryExpr { + override predicate mayHaveOwnSideEffects() { any() } + + override string getOperator() { result = "/" } + + override string getAPrimaryQlClass() { result = "QuoExpr" } +} + +class DivExpr = QuoExpr; + +/** + * A remainder or modulo expression using `%`. + * + * Examples: + * + * ```go + * a % b + * ``` + */ +class RemExpr extends @remexpr, ArithmeticBinaryExpr { + override string getOperator() { result = "%" } + + override string getAPrimaryQlClass() { result = "RemExpr" } +} + +class ModExpr = RemExpr; + +/** + * A left-shift expression using `<<`. + * + * Examples: + * + * ```go + * a << i + * ``` + */ +class ShlExpr extends @shlexpr, ShiftExpr { + override string getOperator() { result = "<<" } + + override string getAPrimaryQlClass() { result = "ShlExpr" } +} + +class LShiftExpr = ShlExpr; + +/** + * A right-shift expression using `>>`. + * + * Examples: + * + * ```go + * a >> i + * ``` + */ +class ShrExpr extends @shrexpr, ShiftExpr { + override string getOperator() { result = ">>" } + + override string getAPrimaryQlClass() { result = "ShrExpr" } +} + +class RShiftExpr = ShrExpr; + +/** + * A bitwise and-expression using `&`. + * + * Examples: + * + * ```go + * a & b + * ``` + */ +class AndExpr extends @andexpr, BitwiseBinaryExpr { + override string getOperator() { result = "&" } + + override string getAPrimaryQlClass() { result = "AndExpr" } +} + +class BitAndExpr = AndExpr; + +/** + * A bitwise and-not expression using `&^`. + * + * Examples: + * + * ```go + * a &^ b + * ``` + */ +class AndNotExpr extends @andnotexpr, BitwiseBinaryExpr { + override string getOperator() { result = "&^" } + + override string getAPrimaryQlClass() { result = "AndNotExpr" } +} + +/** + * An expression representing a channel type. + * + * Examples: + * + * ```go + * chan float64 + * chan<- bool + * <-chan int + * ``` + */ +class ChanTypeExpr extends @chantypeexpr, TypeExpr { + /** + * Gets the expression representing the type of values flowing through the channel. + */ + Expr getValueTypeExpr() { result = getChildExpr(0) } + + /** Holds if this channel can send data. */ + predicate canSend() { none() } + + /** Holds if this channel can receive data. */ + predicate canReceive() { none() } + + override string toString() { result = "channel type" } + + override string getAPrimaryQlClass() { result = "ChanTypeExpr" } +} + +/** + * An expression representing a send-only channel type. + * + * Examples: + * + * ```go + * chan<- bool + * ``` + */ +class SendChanTypeExpr extends @sendchantypeexpr, ChanTypeExpr { + override predicate canSend() { any() } + + override string getAPrimaryQlClass() { result = "SendChanTypeExpr" } +} + +/** + * An expression representing a receive-only channel type. + * + * Examples: + * + * ```go + * <-chan int + * ``` + */ +class RecvChanTypeExpr extends @recvchantypeexpr, ChanTypeExpr { + override predicate canReceive() { any() } + + override string getAPrimaryQlClass() { result = "RecvChanTypeExpr" } +} + +/** + * An expression representing a duplex channel type that can both send and receive data. + * + * Examples: + * + * ```go + * chan float64 + * ``` + */ +class SendRecvChanTypeExpr extends @sendrcvchantypeexpr, ChanTypeExpr { + override predicate canSend() { any() } + + override predicate canReceive() { any() } + + override string getAPrimaryQlClass() { result = "SendRecvChanTypeExpr" } +} + +/** + * A (possibly qualified) name referring to a package, type, constant, variable, function or label. + * + * Examples: + * + * ```go + * Println + * fmt.Println + * fmt + * int + * T + * x + * Outerloop + * ``` + */ +class Name extends Expr { + Entity target; + + Name() { this.(Ident).refersTo(target) or this.(SelectorExpr).refersTo(target) } + + /** Gets the entity this name refers to. */ + Entity getTarget() { result = target } +} + +/** + * A simple (that is, unqualified) name. + * + * Examples: + * + * ```go + * Println + * ``` + */ +class SimpleName extends Name, Ident { } + +/** + * A qualified name. + * + * Examples: + * + * ```go + * fmt.Println + * ``` + */ +class QualifiedName extends Name, SelectorExpr { } + +/** + * A name referring to an imported package. + * + * Examples: + * + * ```go + * fmt + * ``` + */ +class PackageName extends Name { + override PackageEntity target; + + /** Gets the package this name refers to. */ + override PackageEntity getTarget() { result = target } + + override string getAPrimaryQlClass() { result = "PackageName" } +} + +/** + * A name referring to a type. + * + * Examples: + * + * ```go + * int + * T + * ``` + */ +class TypeName extends Name { + override TypeEntity target; + + /** Gets the type this name refers to. */ + override TypeEntity getTarget() { result = target } + + override string getAPrimaryQlClass() { result = "TypeName" } +} + +/** + * A name referring to a value, that is, a constant, variable or function. + * + * Examples: + * + * ```go + * c + * f + * x + * ``` + */ +class ValueName extends Name { + override ValueEntity target; + + /** Gets the constant, variable or function this name refers to. */ + override ValueEntity getTarget() { result = target } + + override string getAPrimaryQlClass() { result = "ValueName" } +} + +/** + * A name referring to a constant. + * + * Examples: + * + * ```go + * c + * ``` + */ +class ConstantName extends ValueName { + override Constant target; + + /** Gets the constant this name refers to. */ + override Constant getTarget() { result = target } + + override predicate isPlatformIndependentConstant() { + target = Builtin::bool(_) + or + target = Builtin::iota() + or + target = Builtin::nil() + or + exists(DeclaredConstant c | c = target | + not c.getSpec().getFile().hasBuildConstraints() and + c.getInit().isPlatformIndependentConstant() + ) + } + + override string getAPrimaryQlClass() { result = "ConstantName" } +} + +/** + * A name referring to a variable. + * + * Examples: + * + * ```go + * x + * ``` + */ +class VariableName extends ValueName { + override Variable target; + + /** Gets the variable this name refers to. */ + override Variable getTarget() { result = target } + + override string getAPrimaryQlClass() { result = "VariableName" } +} + +/** + * A name referring to a function. + * + * Examples: + * + * ```go + * f + * ``` + */ +class FunctionName extends ValueName { + override Function target; + + /** Gets the function this name refers to. */ + override Function getTarget() { result = target } + + override string getAPrimaryQlClass() { result = "FunctionName" } +} + +/** + * A name referring to a statement label. + * + * Examples: + * + * ```go + * Outerloop + * ``` + */ +class LabelName extends Name { + override Label target; + + /** Gets the label this name refers to. */ + override Label getTarget() { result = target } + + override string getAPrimaryQlClass() { result = "LabelName" } +} + +/** + * Holds if `e` is a type expression, as determined by a bottom-up syntactic + * analysis starting with `TypeName`s. + * + * On a snapshot with full type information, this predicate covers all type + * expressions. However, if type information is missing then not all type names + * may be identified as such, so not all type expressions can be determined by + * a bottom-up analysis. In such cases, `isTypeExprTopDown` below is useful. + */ +private predicate isTypeExprBottomUp(Expr e) { + e instanceof TypeName or + e instanceof @arraytypeexpr or + e instanceof @structtypeexpr or + e instanceof @functypeexpr or + e instanceof @interfacetypeexpr or + e instanceof @maptypeexpr or + e instanceof @chantypeexpr or + isTypeExprBottomUp(e.(ParenExpr).getExpr()) or + isTypeExprBottomUp(e.(StarExpr).getBase()) or + isTypeExprBottomUp(e.(Ellipsis).getOperand()) +} + +/** + * Holds if `e` must be a type expression because it either occurs in a syntactic + * position where a type is expected, or it is part of a larger type expression. + * + * This predicate is only needed on snapshots for which type information is + * incomplete. It is an underapproximation; in cases where it is syntactically ambiguous + * whether an expression refers to a type or a value, we conservatively assume that + * it may be the latter and so this predicate does not consider the expression to be + * a type expression. + */ +private predicate isTypeExprTopDown(Expr e) { + e = any(CompositeLit cl).getTypeExpr() + or + e = any(TypeAssertExpr ta).getTypeExpr() + or + e = any(ArrayTypeExpr ae).getElement() + or + e = any(FieldDecl f).getTypeExpr() + or + e = any(ParameterDecl pd).getTypeExpr() + or + e = any(ReceiverDecl rd).getTypeExpr() + or + e = any(ResultVariableDecl rvd).getTypeExpr() + or + e = any(MethodSpec md).getTypeExpr() + or + e = any(MapTypeExpr mt).getKeyTypeExpr() + or + e = any(MapTypeExpr mt).getValueTypeExpr() + or + e = any(ChanTypeExpr ct).getValueTypeExpr() + or + e = any(ValueSpec s).getTypeExpr() + or + e = any(TypeSpec s).getTypeExpr() + or + e = any(TypeSwitchStmt s).getACase().getExpr(_) and + // special case: `nil` is allowed in a type case but isn't a type + not e = Builtin::nil().getAReference() + or + e = any(SelectorExpr sel | isTypeExprTopDown(sel)).getBase() + or + e = any(ParenExpr pe | isTypeExprTopDown(pe)).getExpr() + or + e = any(StarExpr se | isTypeExprTopDown(se)).getBase() + or + e = any(Ellipsis ell | isTypeExprTopDown(ell)).getOperand() +} + +/** + * An expression referring to a type. + * + * Examples: + * + * ```go + * int + * func + * ``` + */ +class TypeExpr extends Expr { + TypeExpr() { + isTypeExprBottomUp(this) or + isTypeExprTopDown(this) + } +} + +/** + * An expression referring to a memory location. + * + * Examples: + * + * ```go + * a[i] + * *p + * ``` + */ +class ReferenceExpr extends Expr { + ReferenceExpr() { + (this instanceof Ident or this instanceof SelectorExpr) and + not (this instanceof PackageName or this instanceof TypeName or this instanceof LabelName) and + not this instanceof TypeExpr and + not this = any(ImportSpec is).getNameExpr() and + not this = any(File f).getPackageNameExpr() and + not this = any(LabeledStmt ls).getLabelExpr() and + not this = any(BranchStmt bs).getLabelExpr() and + not this = any(FieldDecl f).getNameExpr(_) and + not this = any(ParameterDecl pd).getNameExpr(_) and + not this = any(ReceiverDecl rd).getNameExpr() and + not this = any(ResultVariableDecl rvd).getNameExpr(_) and + not this = any(MethodSpec md).getNameExpr() and + not this = any(StructLit sl).getKey(_) + or + this.(ParenExpr).getExpr() instanceof ReferenceExpr + or + this.(StarExpr).getBase() instanceof ReferenceExpr + or + this instanceof DerefExpr + or + this instanceof IndexExpr + } + + /** Holds if this reference expression occurs in a position where it is being assigned to. */ + predicate isLvalue() { + this = any(Assignment assgn).getLhs(_) + or + this = any(IncDecStmt ids).getOperand() + or + exists(RangeStmt rs | + this = rs.getKey() or + this = rs.getValue() + ) + or + exists(ValueSpec spec, int i | this = spec.getNameExpr(i)) + or + exists(FuncDecl fd | this = fd.getNameExpr()) + } + + /** Holds if this reference expression occurs in a position where it is evaluated to a value. */ + predicate isRvalue() { + not this.isLvalue() + or + this = any(CompoundAssignStmt cmp).getLhs(_) + or + this = any(IncDecStmt ids).getOperand() + } +} + +/** + * An expression that refers to a value (as opposed to a package, a type or a statement label). + * + * Examples: + * + * ```go + * x + y + * f(x) + * ``` + */ +class ValueExpr extends Expr { + ValueExpr() { + this.(ReferenceExpr).isRvalue() or + this instanceof BasicLit or + this instanceof FuncLit or + this instanceof CompositeLit or + this.(ParenExpr).getExpr() instanceof ValueExpr or + this instanceof SliceExpr or + this instanceof TypeAssertExpr or + this instanceof CallOrConversionExpr or + this.(StarExpr).getBase() instanceof ValueExpr or + this instanceof OperatorExpr + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Files.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Files.qll new file mode 100644 index 00000000000..28cb395beb3 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Files.qll @@ -0,0 +1,278 @@ +/** Provides classes for working with files and folders. */ + +import go + +/** A file or folder. */ +abstract class Container extends @container { + /** + * Gets the absolute, canonical path of this container, using forward slashes + * as path separator. + * + * The path starts with a _root prefix_ followed by zero or more _path + * segments_ separated by forward slashes. + * + * The root prefix is of one of the following forms: + * + * 1. A single forward slash `/` (Unix-style) + * 2. An upper-case drive letter followed by a colon and a forward slash, + * such as `C:/` (Windows-style) + * 3. Two forward slashes, a computer name, and then another forward slash, + * such as `//FileServer/` (UNC-style) + * + * Path segments are never empty (that is, absolute paths never contain two + * contiguous slashes, except as part of a UNC-style root prefix). Also, path + * segments never contain forward slashes, and no path segment is of the + * form `.` (one dot) or `..` (two dots). + * + * Note that an absolute path never ends with a forward slash, except if it is + * a bare root prefix, that is, the path has no path segments. A container + * whose absolute path has no segments is always a `Folder`, not a `File`. + */ + abstract string getAbsolutePath(); + + /** + * Gets a URL representing the location of this container. + * + * For more information see https://lgtm.com/help/ql/locations#providing-urls. + */ + abstract string getURL(); + + /** + * Gets the relative path of this file or folder from the root folder of the + * analyzed source location. The relative path of the root folder itself is + * the empty string. + * + * This has no result if the container is outside the source root, that is, + * if the root folder is not a reflexive, transitive parent of this container. + */ + string getRelativePath() { + exists(string absPath, string pref | + absPath = getAbsolutePath() and sourceLocationPrefix(pref) + | + absPath = pref and result = "" + or + absPath = pref.regexpReplaceAll("/$", "") + "/" + result and + not result.matches("/%") + ) + } + + /** + * Gets the base name of this container including extension, that is, the last + * segment of its absolute path, or the empty string if it has no segments. + * + * Here are some examples of absolute paths and the corresponding base names + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + * + *
Absolute pathBase name
"/tmp/tst.go""tst.go"
"C:/Program Files (x86)""Program Files (x86)"
"/"""
"C:/"""
"D:/"""
"//FileServer/"""
+ */ + string getBaseName() { + result = getAbsolutePath().regexpCapture(".*/(([^/]*?)(?:\\.([^.]*))?)", 1) + } + + /** + * Gets the extension of this container, that is, the suffix of its base name + * after the last dot character, if any. + * + * In particular, + * + * - if the name does not include a dot, there is no extension, so this + * predicate has no result; + * - if the name ends in a dot, the extension is the empty string; + * - if the name contains multiple dots, the extension follows the last dot. + * + * Here are some examples of absolute paths and the corresponding extensions + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
Absolute pathExtension
"/tmp/tst.go""go"
"/tmp/.classpath""classpath"
"/bin/bash"not defined
"/tmp/tst2."""
"/tmp/x.tar.gz""gz"
+ */ + string getExtension() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(\\.([^.]*))?", 3) } + + /** + * Gets the stem of this container, that is, the prefix of its base name up to + * (but not including) the last dot character if there is one, or the entire + * base name if there is not. + * + * Here are some examples of absolute paths and the corresponding stems + * (surrounded with quotes to avoid ambiguity): + * + * + * + * + * + * + * + * + *
Absolute pathStem
"/tmp/tst.go""tst"
"/tmp/.classpath"""
"/bin/bash""bash"
"/tmp/tst2.""tst2"
"/tmp/x.tar.gz""x.tar"
+ */ + string getStem() { result = getAbsolutePath().regexpCapture(".*/([^/]*?)(?:\\.([^.]*))?", 1) } + + /** Gets the parent container of this file or folder, if any. */ + Container getParentContainer() { containerparent(result, this) } + + /** Gets a file or sub-folder in this container. */ + Container getAChildContainer() { this = result.getParentContainer() } + + /** Gets a file in this container. */ + File getAFile() { result = getAChildContainer() } + + /** Gets the file in this container that has the given `baseName`, if any. */ + File getFile(string baseName) { + result = getAFile() and + result.getBaseName() = baseName + } + + /** Gets a sub-folder in this container. */ + Folder getAFolder() { result = getAChildContainer() } + + /** Gets the sub-folder in this container that has the given `baseName`, if any. */ + Folder getFolder(string baseName) { + result = getAFolder() and + result.getBaseName() = baseName + } + + /** + * Gets a textual representation of the path of this container. + * + * This is the absolute path of the container. + */ + string toString() { result = getAbsolutePath() } +} + +/** A folder. */ +class Folder extends Container, @folder { + override string getAbsolutePath() { folders(this, result) } + + /** Gets the file or subfolder in this folder that has the given `name`, if any. */ + Container getChildContainer(string name) { + result = getAChildContainer() and + result.getBaseName() = name + } + + /** Gets the file in this folder that has the given `stem` and `extension`, if any. */ + File getFile(string stem, string extension) { + result = getAChildContainer() and + result.getStem() = stem and + result.getExtension() = extension + } + + /** Gets a subfolder contained in this folder. */ + Folder getASubFolder() { result = getAChildContainer() } + + /** Gets the URL of this folder. */ + override string getURL() { result = "folder://" + getAbsolutePath() } +} + +/** Any file, including files that have not been extracted but are referred to as locations for errors. */ +class ExtractedOrExternalFile extends Container, @file, Documentable, ExprParent, GoModExprParent, + DeclParent, ScopeNode { + override Location getLocation() { has_location(this, result) } + + override string getAbsolutePath() { files(this, result) } + + /** Gets the number of lines in this file. */ + int getNumberOfLines() { numlines(this, result, _, _) } + + /** Gets the number of lines containing code in this file. */ + int getNumberOfLinesOfCode() { numlines(this, _, result, _) } + + /** Gets the number of lines containing comments in this file. */ + int getNumberOfLinesOfComments() { numlines(this, _, _, result) } + + /** Gets the package name as specified in the package clause of this file. */ + Ident getPackageNameExpr() { result = getChildExpr(0) } + + /** Gets the name of the package to which this file belongs. */ + string getPackageName() { result = getPackageNameExpr().getName() } + + /** Holds if this file contains at least one build constraint. */ + pragma[noinline] + predicate hasBuildConstraints() { exists(BuildConstraintComment bc | this = bc.getFile()) } + + /** + * Holds if this file contains build constraints that ensure that it + * is only built on architectures of bit size `bitSize`, which can be + * 32 or 64. + */ + predicate constrainsIntBitSize(int bitSize) { + explicitlyConstrainsIntBitSize(bitSize) or + implicitlyConstrainsIntBitSize(bitSize) + } + + /** + * Holds if this file contains explicit build constraints that ensure + * that it is only built on an architecture of bit size `bitSize`, + * which can be 32 or 64. + */ + predicate explicitlyConstrainsIntBitSize(int bitSize) { + exists(BuildConstraintComment bcc | this = bcc.getFile() | + forex(string disjunct | disjunct = bcc.getADisjunct() | + disjunct.splitAt(",").(Architecture).getBitSize() = bitSize + or + disjunct.splitAt("/").(Architecture).getBitSize() = bitSize + ) + ) + } + + /** + * Holds if this file has a name which acts as an implicit build + * constraint that ensures that it is only built on an + * architecture of bit size `bitSize`, which can be 32 or 64. + */ + predicate implicitlyConstrainsIntBitSize(int bitSize) { + exists(Architecture arch | arch.getBitSize() = bitSize | + this.getStem().regexpMatch("(?i).*_\\Q" + arch + "\\E(_test)?") + ) + } + + override string toString() { result = Container.super.toString() } + + /** Gets the URL of this file. */ + override string getURL() { result = "file://" + this.getAbsolutePath() + ":0:0:0:0" } + + /** Gets the `i`th child comment group. */ + CommentGroup getCommentGroup(int i) { comment_groups(result, this, i) } + + /** Gets a child comment group. */ + CommentGroup getACommentGroup() { result = getCommentGroup(_) } + + /** Gets the number of child comment groups of this file. */ + int getNumCommentGroups() { result = count(getACommentGroup()) } + + override string getAPrimaryQlClass() { result = "File" } +} + +/** A file that has been extracted. */ +class File extends ExtractedOrExternalFile { + File() { + // getAChild is specifically for the Go AST and so does not apply to non-go files + // we care about all non-go extracted files, as only go files can have `@file` entries due to requiring a file entry for diagnostic errors + not this.getExtension() = "go" + or + exists(this.getAChild()) + } +} + +/** A Go file. */ +class GoFile extends File { + GoFile() { this.getExtension() = "go" } +} + +/** An HTML file. */ +class HtmlFile extends File { + HtmlFile() { this.getExtension().regexpMatch("x?html?") } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/GoMod.qll b/repo-tests/codeql-go/ql/lib/semmle/go/GoMod.qll new file mode 100644 index 00000000000..119339c18eb --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/GoMod.qll @@ -0,0 +1,231 @@ +/** + * Provides classes for working with go.mod files. + */ + +import go + +/** A go.mod file. */ +class GoModFile extends File { + GoModFile() { this.getBaseName() = "go.mod" } + + /** + * Gets the module declaration of this file, that is, the line declaring the path of this module. + */ + GoModModuleLine getModuleDeclaration() { result.getFile() = this } + + override string getAPrimaryQlClass() { result = "GoModFile" } +} + +/** + * An expression in a go.mod file, which is used to declare dependencies. + */ +class GoModExpr extends @modexpr, GoModExprParent { + /** + * Gets the kind of this expression, which is an integer value representing the expression's + * node type. + * + * Note that the mapping from node types to integer kinds is considered an implementation detail + * and subject to change without notice. + */ + int getKind() { modexprs(this, result, _, _) } + + /** + * Get the comment group associated with this expression. + */ + DocComment getComments() { result.getDocumentedElement() = this } + + override GoModFile getFile() { result = GoModExprParent.super.getFile() } + + /** Gets path of the module of this go.mod expression. */ + string getModulePath() { result = this.getFile().getModuleDeclaration().getPath() } + + override string toString() { result = "go.mod expression" } + + override string getAPrimaryQlClass() { result = "GoModExpr" } +} + +/** + * A top-level block of comments separate from any rule. + */ +class GoModCommentBlock extends @modcommentblock, GoModExpr { + override string getAPrimaryQlClass() { result = "GoModCommentBlock" } +} + +/** + * A single line of tokens. + */ +class GoModLine extends @modline, GoModExpr { + /** + * Gets the `i`th token on this line, 0-based. + * + * Generally, one should use `getToken`, as that accounts for lines inside of line blocks. + */ + string getRawToken(int i) { modtokens(result, this, i) } + + /** + * Gets the `i`th token of `line`, including the token in the line block declaration, if it there is + * one, 0-based. + * + * This compensates for the fact that lines in line blocks have their 0th token in the line block + * declaration, and makes dealing with lines more uniform. + * + * For example, `.getToken(1)` will result in the dependency path (`github.com/github/codeql-go`) + * for both lines for normal require lines like `require "github.com/github/codeql-go" v1.2.3` and + * in a line block like + * + * ``` + * require ( + * "github.com/github/codeql-go" v1.2.3 + * ... + * ) + * ``` + * + * As a special case, when `i` is `0` and the line is in a line block, the result will be the + * token from the line block. + */ + string getToken(int i) { + i = 0 and result = this.getParent().(GoModLineBlock).getRawToken(0) + or + if this.getParent() instanceof GoModLineBlock + then result = this.getRawToken(i - 1) + else result = this.getRawToken(i) + } + + override string toString() { result = "go.mod line" } + + override string getAPrimaryQlClass() { result = "GoModLine" } +} + +/** + * A factored block of lines, for example: + * ``` + * require ( + * "github.com/github/codeql-go" v1.2.3 + * "golang.org/x/tools" v3.2.1 + * ) + * ``` + */ +class GoModLineBlock extends @modlineblock, GoModExpr { + /** + * Gets the `i`th token of this line block, 0-based. + * + * Usually one should not have to use this, as `GoModLine.getToken(0)` will get the token from its + * parent line block, if any. + */ + string getRawToken(int i) { modtokens(result, this, i) } + + override string toString() { result = "go.mod line block" } + + override string getAPrimaryQlClass() { result = "GoModLineBlock" } +} + +/** + * A line that contains the module's package path, for example `module github.com/github/codeql-go`. + */ +class GoModModuleLine extends GoModLine { + GoModModuleLine() { this.getToken(0) = "module" } + + /** + * Get the path of the module being declared. + */ + string getPath() { result = this.getToken(1) } + + override string toString() { result = "go.mod module line" } + + override string getAPrimaryQlClass() { result = "GoModModuleLine" } +} + +/** + * A line that declares the Go version to be used, for example `go 1.14`. + */ +class GoModGoLine extends GoModLine { + GoModGoLine() { this.getToken(0) = "go" } + + /** Gets the Go version declared. */ + string getVersion() { result = this.getToken(1) } + + override string toString() { result = "go.mod go line" } + + override string getAPrimaryQlClass() { result = "GoModGoLine" } +} + +/** + * A line that declares a requirement, for example `require "github.com/github/codeql-go" v1.2.3`. + */ +class GoModRequireLine extends GoModLine { + GoModRequireLine() { this.getToken(0) = "require" } + + /** Gets the path of the dependency. */ + string getPath() { result = this.getToken(1) } + + /** Gets the version of the dependency. */ + string getVersion() { result = this.getToken(2) } + + override string toString() { result = "go.mod require line" } + + override string getAPrimaryQlClass() { result = "GoModRequireLine" } +} + +/** + * A line that declares a dependency version to exclude, for example + * `exclude "github.com/github/codeql-go" v1.2.3`. + */ +class GoModExcludeLine extends GoModLine { + GoModExcludeLine() { this.getToken(0) = "exclude" } + + /** Gets the path of the dependency to exclude a version of. */ + string getPath() { result = this.getToken(1) } + + /** Gets the excluded version. */ + string getVersion() { result = this.getToken(2) } + + override string toString() { result = "go.mod exclude line" } + + override string getAPrimaryQlClass() { result = "GoModExcludeLine" } +} + +/** + * A line that specifies a dependency to use instead of another one, for example + * `replace "golang.org/x/tools" => "github.com/golang/tools" v1.2.3`. + */ +class GoModReplaceLine extends GoModLine { + GoModReplaceLine() { this.getToken(0) = "replace" } + + /** Gets the path of the dependency to be replaced. */ + string getOriginalPath() { result = this.getToken(1) } + + /** Gets the path of the dependency to be replaced, if any. */ + string getOriginalVersion() { result = this.getToken(2) and not result = "=>" } + + /** Gets the path of the replacement dependency. */ + string getReplacementPath() { + if exists(this.getOriginalVersion()) + then result = this.getToken(4) + else result = this.getToken(3) + } + + /** Gets the version of the replacement dependency. */ + string getReplacementVersion() { + if exists(this.getOriginalVersion()) + then result = this.getToken(5) + else result = this.getToken(4) + } + + override string toString() { result = "go.mod replace line" } + + override string getAPrimaryQlClass() { result = "GoModReplaceLine" } +} + +/** A left parenthesis for a line block. */ +class GoModLParen extends @modlparen, GoModExpr { + override string toString() { result = "go.mod (" } + + override string getAPrimaryQlClass() { result = "GoModLParen" } +} + +/** A right parenthesis for a line block. */ +class GoModRParen extends @modrparen, GoModExpr { + override string toString() { result = "go.mod )" } + + override string getAPrimaryQlClass() { result = "GoModRParen" } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/HTML.qll b/repo-tests/codeql-go/ql/lib/semmle/go/HTML.qll new file mode 100644 index 00000000000..f4fb773ca8e --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/HTML.qll @@ -0,0 +1,207 @@ +/** Provides classes for working with HTML documents. */ + +import go + +module HTML { + /** + * An HTML element. + * + * Example: + * + * ``` + * Semmle + * ``` + */ + class Element extends Locatable, @xmlelement { + Element() { exists(HtmlFile f | xmlElements(this, _, _, _, f)) } + + override Location getLocation() { xmllocations(this, result) } + + /** + * Gets the name of this HTML element. + * + * For example, the name of `
` is `br`. + */ + string getName() { xmlElements(this, result, _, _, _) } + + /** + * Gets the parent element of this element, if any. + */ + Element getParent() { xmlElements(this, _, result, _, _) } + + /** + * Holds if this is a toplevel element, that is, if it does not have a parent element. + */ + predicate isTopLevel() { not exists(getParent()) } + + /** + * Gets the root HTML document element in which this element is contained. + */ + DocumentElement getDocument() { result = getRoot() } + + /** + * Gets the root element in which this element is contained. + */ + Element getRoot() { if isTopLevel() then result = this else result = getParent().getRoot() } + + /** + * Gets the `i`th child element (0-based) of this element. + */ + Element getChild(int i) { xmlElements(result, _, this, i, _) } + + /** + * Gets a child element of this element. + */ + Element getChild() { result = getChild(_) } + + /** + * Gets the `i`th attribute (0-based) of this element. + */ + Attribute getAttribute(int i) { xmlAttrs(result, this, _, _, i, _) } + + /** + * Gets an attribute of this element. + */ + Attribute getAnAttribute() { result = getAttribute(_) } + + /** + * Gets an attribute of this element that has the given name. + */ + Attribute getAttributeByName(string name) { + result = getAnAttribute() and + result.getName() = name + } + + /** + * Gets the text node associated with this element. + */ + TextNode getTextNode() { result.getParent() = this } + + override string toString() { result = "<" + getName() + ">..." } + } + + /** + * An attribute of an HTML element. + * + * Examples: + * + * ``` + * + * target=_blank + * >Semmle + * ``` + */ + class Attribute extends Locatable, @xmlattribute { + Attribute() { xmlAttrs(this, _, _, _, _, any(HtmlFile f)) } + + override Location getLocation() { xmllocations(this, result) } + + /** + * Gets the element to which this attribute belongs. + */ + Element getElement() { xmlAttrs(this, result, _, _, _, _) } + + /** + * Gets the root element in which the element to which this attribute + * belongs is contained. + */ + Element getRoot() { result = getElement().getRoot() } + + /** + * Gets the name of this attribute. + */ + string getName() { xmlAttrs(this, _, result, _, _, _) } + + /** + * Gets the value of this attribute. + * + * For attributes without an explicitly specified value, the + * result is the empty string. + */ + string getValue() { xmlAttrs(this, _, _, result, _, _) } + + override string toString() { result = getName() + "=" + getValue() } + } + + /** + * An HTML `` element. + * + * Example: + * + * ``` + * + * + * This is a test. + * + * + * ``` + */ + class DocumentElement extends Element { + DocumentElement() { getName() = "html" } + } + + /** + * An HTML text node. + * + * Example: + * + * ``` + *
+ * This text is represented as a text node. + *
+ * ``` + */ + class TextNode extends Locatable, @xmlcharacters { + TextNode() { exists(HtmlFile f | xmlChars(this, _, _, _, _, f)) } + + override string toString() { result = getText() } + + /** + * Gets the content of this text node. + * + * Note that entity expansion has been performed already. + */ + string getText() { xmlChars(this, result, _, _, _, _) } + + /** + * Gets the parent this text. + */ + Element getParent() { xmlChars(this, _, result, _, _, _) } + + /** + * Gets the child index number of this text node. + */ + int getIndex() { xmlChars(this, _, _, result, _, _) } + + /** + * Holds if this text node is inside a `CDATA` tag. + */ + predicate isCData() { xmlChars(this, _, _, _, 1, _) } + + override Location getLocation() { xmllocations(this, result) } + } + + /** + * An HTML comment. + * + * Example: + * + * ``` + * + * ``` + */ + class CommentNode extends Locatable, @xmlcomment { + CommentNode() { exists(HtmlFile f | xmlComments(this, _, _, f)) } + + /** Gets the element in which this comment occurs. */ + Element getParent() { xmlComments(this, _, result, _) } + + /** Gets the text of this comment, not including delimiters. */ + string getText() { result = toString().regexpCapture("(?s)", 1) } + + override string toString() { xmlComments(this, result, _, _) } + + override Location getLocation() { xmllocations(this, result) } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Locations.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Locations.qll new file mode 100644 index 00000000000..4fb69be21c1 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Locations.qll @@ -0,0 +1,81 @@ +/** Provides classes for working with locations and program elements that have locations. */ + +import go + +/** + * A location as given by a file, a start line, a start column, + * an end line, and an end column. + * + * For more information about locations see [LGTM locations](https://lgtm.com/help/ql/locations). + */ +class Location extends @location { + /** Gets the file for this location. */ + File getFile() { locations_default(this, result, _, _, _, _) } + + /** Gets the 1-based line number (inclusive) where this location starts. */ + int getStartLine() { locations_default(this, _, result, _, _, _) } + + /** Gets the 1-based column number (inclusive) where this location starts. */ + int getStartColumn() { locations_default(this, _, _, result, _, _) } + + /** Gets the 1-based line number (inclusive) where this location ends. */ + int getEndLine() { locations_default(this, _, _, _, result, _) } + + /** Gets the 1-based column number (inclusive) where this location ends. */ + int getEndColumn() { locations_default(this, _, _, _, _, result) } + + /** Gets the number of lines covered by this location. */ + int getNumLines() { result = getEndLine() - getStartLine() + 1 } + + /** Gets a textual representation of this element. */ + string toString() { + exists(string filepath, int startline, int startcolumn, int endline, int endcolumn | + hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) and + result = filepath + "@" + startline + ":" + startcolumn + ":" + endline + ":" + endcolumn + ) + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://lgtm.com/help/ql/locations). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(File f | + locations_default(this, f, startline, startcolumn, endline, endcolumn) and + filepath = f.getAbsolutePath() + ) + } +} + +/** A program element with a location. */ +class Locatable extends @locatable { + /** Gets the file this program element comes from. */ + File getFile() { result = getLocation().getFile() } + + /** Gets this element's location. */ + Location getLocation() { has_location(this, result) } + + /** Gets the number of lines covered by this element. */ + int getNumLines() { result = getLocation().getNumLines() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://lgtm.com/help/ql/locations). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets a textual representation of this element. */ + string toString() { result = "locatable element" } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Packages.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Packages.qll new file mode 100644 index 00000000000..bc51911da27 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Packages.qll @@ -0,0 +1,41 @@ +/** + * Provides classes for working with packages. + */ + +import go + +/** + * A package. + */ +class Package extends @package { + /** Gets the name of this package. */ + string getName() { packages(this, result, _, _) } + + /** Gets the path of this package. */ + string getPath() { + exists(string fullPath | packages(this, _, fullPath, _) | + result = fullPath.regexpReplaceAll("^.*/vendor/", "") + ) + } + + /** Gets the scope of this package. */ + PackageScope getScope() { packages(this, _, _, result) } + + /** Gets a textual representation of this element. */ + string toString() { result = "package " + getPath() } +} + +/** + * Gets an import path that identifies a package in module `mod` with the given path, + * possibly modulo [semantic import versioning](https://github.com/golang/go/wiki/Modules#semantic-import-versioning). + * + * For example, `package("github.com/go-pg/pg", "types")` gets an import path that can + * refer to `"github.com/go-pg/pg/types"`, but also to `"github.com/go-pg/pg/v10/types"`. + */ +bindingset[mod, path] +string package(string mod, string path) { + // "\Q" and "\E" start and end a quoted section of a regular expression. Anything like "." or "*" that + // "*" that comes between them is not interpreted as it would normally be in a regular expression. + result.regexpMatch("\\Q" + mod + "\\E([/.]v[^/]+)?($|/)\\Q" + path + "\\E") and + result = any(Package p).getPath() +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/PrintAst.ql b/repo-tests/codeql-go/ql/lib/semmle/go/PrintAst.ql new file mode 100644 index 00000000000..23b6b123b06 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/PrintAst.ql @@ -0,0 +1,20 @@ +/** + * @name Print AST + * @description Outputs a representation of the Abstract Syntax Tree. + * @id go/print-ast + * @kind graph + */ + +import go +import PrintAst + +/** + * Hook to customize the functions printed by this query. + */ +class Cfg extends PrintAstConfiguration { + override predicate shouldPrintFunction(FuncDecl func) { any() } + + override predicate shouldPrintFile(File file) { any() } + + override predicate shouldPrintComments(File file) { any() } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/PrintAst.qll b/repo-tests/codeql-go/ql/lib/semmle/go/PrintAst.qll new file mode 100644 index 00000000000..cf28be44f49 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/PrintAst.qll @@ -0,0 +1,271 @@ +/** + * Provides queries to pretty-print a Go AST as a graph. + */ + +import go + +/** + * Hook to customize the files and functions printed by this module. + * + * For an AstNode to be printed, it always requires `shouldPrintFile(f)` to hold + * for its containing file `f`, and additionally requires `shouldPrintFunction(fun)` + * to hold if it is, or is a child of, function `fun`. + */ +class PrintAstConfiguration extends string { + /** + * Restrict to a single string, making this a singleton type. + */ + PrintAstConfiguration() { this = "PrintAstConfiguration" } + + /** + * Holds if the AST for `func` should be printed. By default, holds for all + * functions. + */ + predicate shouldPrintFunction(FuncDecl func) { any() } + + /** + * Holds if the AST for `file` should be printed. By default, holds for all + * files. + */ + predicate shouldPrintFile(File file) { any() } + + /** + * Holds if the AST for `file` should include comments. By default, holds for all + * files. + */ + predicate shouldPrintComments(File file) { any() } +} + +private predicate shouldPrintFunction(FuncDef func) { + exists(PrintAstConfiguration config | config.shouldPrintFunction(func)) +} + +private predicate shouldPrintFile(File file) { + exists(PrintAstConfiguration config | config.shouldPrintFile(file)) +} + +private predicate shouldPrintComments(File file) { + exists(PrintAstConfiguration config | config.shouldPrintComments(file)) +} + +private FuncDecl getEnclosingFunctionDecl(AstNode n) { result = n.getParent*() } + +/** + * An AST node that should be printed. + */ +private newtype TPrintAstNode = + TAstNode(AstNode ast) { + shouldPrintFile(ast.getFile()) and + // Do print ast nodes without an enclosing function, e.g. file headers, that are not otherwise excluded + forall(FuncDecl f | f = getEnclosingFunctionDecl(ast) | shouldPrintFunction(f)) and + ( + shouldPrintComments(ast.getFile()) + or + not ast instanceof Comment and not ast instanceof CommentGroup + ) + } + +/** + * A node in the output tree. + */ +class PrintAstNode extends TPrintAstNode { + /** + * Gets a textual representation of this node. + */ + abstract string toString(); + + /** + * Gets the child node at index `childIndex`. Child indices must be unique, + * but need not be contiguous. + */ + abstract PrintAstNode getChild(int childIndex); + + /** + * Holds if this node should be printed in the output. By default, all nodes + * within a function are printed, but the query can override + * `PrintAstConfiguration.shouldPrintFunction` to filter the output. + */ + predicate shouldPrint() { exists(getLocation()) } + + /** + * Gets a child of this node. + */ + PrintAstNode getAChild() { result = getChild(_) } + + /** + * Gets the location of this node in the source code. + */ + abstract Location getLocation(); + + /** + * Gets the value of the property of this node, where the name of the property + * is `key`. + */ + string getProperty(string key) { + key = "semmle.label" and + result = toString() + } + + /** + * Gets the label for the edge from this node to the specified child. By + * default, this is just the index of the child, but subclasses can override + * this. + */ + string getChildEdgeLabel(int childIndex) { + exists(getChild(childIndex)) and + result = childIndex.toString() + } + + /** + * Gets the `FuncDef` that contains this node. + */ + abstract FuncDef getEnclosingFunction(); +} + +/** + * Gets a pretty-printed representation of the QL class(es) for entity `el`. + */ +private string qlClass(AstNode el) { + // This version shows all non-overridden QL classes: + // result = "[" + concat(el.getAQlClass(), ", ") + "] " + // Normally we prefer to show just the canonical class: + result = "[" + concat(el.getAPrimaryQlClass(), ", ") + "] " +} + +/** + * A graph node representing a real AST node. + */ +class BaseAstNode extends PrintAstNode, TAstNode { + AstNode ast; + + BaseAstNode() { this = TAstNode(ast) } + + override BaseAstNode getChild(int childIndex) { + // Note a node can have several results for getChild(n) because some + // nodes have multiple different types of child (e.g. a File has a + // child expression, the package name, and child declarations whose + // indices may clash), so we renumber them: + result = TAstNode(ast.getUniquelyNumberedChild(childIndex)) + } + + override string toString() { result = qlClass(ast) + ast } + + final override Location getLocation() { result = ast.getLocation() } + + final override FuncDef getEnclosingFunction() { + result = ast or result = ast.getEnclosingFunction() + } +} + +/** + * A node representing an `Expr`. + */ +class ExprNode extends BaseAstNode { + override Expr ast; + + override string getProperty(string key) { + result = super.getProperty(key) + or + key = "Value" and + result = qlClass(ast) + ast.getExactValue() + or + key = "Type" and + not ast.getType() instanceof InvalidType and + result = ast.getType().pp() + } +} + +/** + * A node representing a `File` + */ +class FileNode extends BaseAstNode { + override File ast; + + private string getRelativePath() { result = ast.getRelativePath() } + + private int getSortOrder() { + rank[result](FileNode fn | any() | fn order by fn.getRelativePath()) = this + } + + override string getProperty(string key) { + result = super.getProperty(key) + or + key = "semmle.order" and + result = getSortOrder().toString() + } + + /** + * Gets a child of this node, renumbering `packageNode`, our parent's + * `oldPackageIndex`th child, as the first child and moving others accordingly. + */ + private BaseAstNode getChildPackageFirst( + int childIndex, BaseAstNode packageNode, int oldPackageIndex + ) { + super.getChild(oldPackageIndex) = packageNode and + ( + childIndex = 0 and result = packageNode + or + result = + rank[childIndex](BaseAstNode node, int i | + node = super.getChild(i) and i != oldPackageIndex + | + node order by i + ) + ) + } + + /** + * Gets a child of this node, moving the package-name expression to the front + * of the list if one exists. + */ + override BaseAstNode getChild(int childIndex) { + if exists(ast.getPackageNameExpr()) + then result = getChildPackageFirst(childIndex, TAstNode(ast.getPackageNameExpr()), _) + else result = super.getChild(childIndex) + } + + /** + * Gets the label for the edge from this node to the specified child. The package name + * expression is named 'package'; others are numbered as per our parent's implementation + * of this method. + */ + override string getChildEdgeLabel(int childIndex) { + if getChild(childIndex) = TAstNode(ast.getPackageNameExpr()) + then result = "package" + else result = super.getChildEdgeLabel(childIndex) + } + + /** + * Gets the string representation of this File. Note explicitly using a relative path + * like this rather than absolute as per default for the File class is a workaround for + * a bug with codeql run test, which should replace absolute paths but currently does not. + */ + override string toString() { result = qlClass(ast) + ast.getRelativePath() } +} + +/** Holds if `node` belongs to the output tree, and its property `key` has the given `value`. */ +query predicate nodes(PrintAstNode node, string key, string value) { + node.shouldPrint() and + value = node.getProperty(key) +} + +/** + * Holds if `target` is a child of `source` in the AST, and property `key` of the edge has the + * given `value`. + */ +query predicate edges(PrintAstNode source, PrintAstNode target, string key, string value) { + exists(int childIndex | + source.shouldPrint() and + target.shouldPrint() and + target = source.getChild(childIndex) + | + key = "semmle.label" and value = source.getChildEdgeLabel(childIndex) + or + key = "semmle.order" and value = childIndex.toString() + ) +} + +/** Holds if property `key` of the graph has the given `value`. */ +query predicate graphProperties(string key, string value) { + key = "semmle.graphKind" and value = "tree" +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Scopes.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Scopes.qll new file mode 100644 index 00000000000..7347cb919bc --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Scopes.qll @@ -0,0 +1,755 @@ +/** + * Provides classes for working with scopes and declared objects. + */ + +import go + +/** + * A scope. + */ +class Scope extends @scope { + /** Gets the enclosing scope of this scope, if any. */ + Scope getOuterScope() { scopenesting(this, result) } + + /** Gets a scope nested inside this scope. */ + Scope getAnInnerScope() { this = result.getOuterScope() } + + /** Looks up the entity with the given name in this scope. */ + Entity getEntity(string name) { + result.getName() = name and + result.getScope() = this + } + + /** Gets a textual representation of this scope. */ + string toString() { result = "scope" } +} + +/** Provides helper predicates for working with scopes. */ +module Scope { + /** Gets the universe scope. */ + UniverseScope universe() { any() } +} + +/** + * The universe scope. + */ +class UniverseScope extends @universescope, Scope { + override string toString() { result = "universe scope" } +} + +/** A package scope. */ +class PackageScope extends @packagescope, Scope { + /** Gets the package whose scope this is. */ + Package getPackage() { this = result.getScope() } + + override string toString() { result = "package scope" } +} + +/** A local scope. */ +class LocalScope extends @localscope, Scope, Locatable { + /** Gets the AST node inducing this scope. */ + ScopeNode getNode() { this = result.getScope() } + + /** + * Gets the function scope in which this scope is nested. + * + * For function scopes, this is the scope itself. + */ + FunctionScope getEnclosingFunctionScope() { + result = getOuterScope().(LocalScope).getEnclosingFunctionScope() + } + + override string toString() { result = "local scope" } +} + +/** A local scope induced by a file. */ +class FileScope extends LocalScope { + FileScope() { getNode() instanceof File } +} + +/** A local scope induced by a function definition. */ +class FunctionScope extends LocalScope { + FuncDef f; + + FunctionScope() { getNode() = f.getTypeExpr() } + + /** Gets the function inducing this scope. */ + FuncDef getFunction() { result = f } + + override FunctionScope getEnclosingFunctionScope() { result = this } + + override string toString() { result = "function scope" } +} + +/** + * A declared or built-in entity (that is, package, type, constant, variable, function or label) + */ +class Entity extends @object { + /** + * Gets the name of this entity. + * + * Anonymous entities (such as the receiver variables of interface methods) have the empty string as their name. + */ + string getName() { objects(this, _, result) } + + /** Gets the package in which this entity is declared, if any. */ + Package getPackage() { result.getScope() = this.getScope() } + + /** Holds if this entity is declared in a package with path `pkg` and has the given `name`. */ + predicate hasQualifiedName(string pkg, string name) { + pkg = getPackage().getPath() and + name = getName() + } + + /** Gets the qualified name of this entity, if any. */ + string getQualifiedName() { + exists(string pkg, string name | hasQualifiedName(pkg, name) | result = pkg + "." + name) + } + + /** + * Gets the scope in which this entity is declared, if any. + * + * Entities corresponding to fields and methods do not have a scope. + */ + Scope getScope() { objectscopes(this, result) } + + /** Gets the declaring identifier for this entity. */ + Ident getDeclaration() { result.declares(this) } + + /** Gets a reference to this entity. */ + Name getAReference() { result.getTarget() = this } + + /** Gets the type of this entity. */ + Type getType() { objecttypes(this, result) } + + /** Gets a textual representation of this entity. */ + string toString() { result = getName() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [LGTM locations](https://lgtm.com/help/ql/locations). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + // take the location of the declaration if there is one + getDeclaration().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + or + // otherwise fall back on dummy location + not exists(getDeclaration()) and + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } +} + +/** A declared entity (that is, type, constant, variable or function). */ +class DeclaredEntity extends Entity, @declobject { + /** Gets the expression to which this entity is initialized, if any. */ + Expr getInit() { + exists(ValueSpec spec, int i | + spec.getNameExpr(i) = getDeclaration() and + spec.getInit(i) = result + ) + } +} + +/** A built-in entity (that is, type, constant or function). */ +class BuiltinEntity extends Entity, @builtinobject { } + +/** An imported package. */ +class PackageEntity extends Entity, @pkgobject { } + +/** A built-in or declared named type. */ +class TypeEntity extends Entity, @typeobject { } + +/** A declared named type. */ +class DeclaredType extends TypeEntity, DeclaredEntity, @decltypeobject { + /** Gets the declaration specifier declaring this type. */ + TypeSpec getSpec() { result.getNameExpr() = this.getDeclaration() } +} + +/** A built-in named type. */ +class BuiltinType extends TypeEntity, BuiltinEntity, @builtintypeobject { } + +/** A built-in or declared constant, variable, field, method or function. */ +class ValueEntity extends Entity, @valueobject { + /** Gets a data-flow node that reads the value of this entity. */ + Read getARead() { result.reads(this) } + + /** Gets a control-flow node that updates the value of this entity. */ + Write getAWrite() { result.writes(this, _) } +} + +/** A built-in or declared constant. */ +class Constant extends ValueEntity, @constobject { } + +/** A declared constant. */ +class DeclaredConstant extends Constant, DeclaredEntity, @declconstobject { + /** Gets the declaration specifier declaring this constant. */ + ValueSpec getSpec() { result.getANameExpr() = this.getDeclaration() } +} + +/** A built-in constant. */ +class BuiltinConstant extends Constant, BuiltinEntity, @builtinconstobject { } + +/** + * A built-in or declared variable. + * + * Note that Go currently does not have any built-in variables, so this class is effectively + * an alias for `DeclaredVariable`. + */ +class Variable extends ValueEntity, @varobject { } + +/** A declared variable. */ +class DeclaredVariable extends Variable, DeclaredEntity, @declvarobject { + /** Gets the declaration specifier declaring this variable. */ + ValueSpec getSpec() { result.getANameExpr() = this.getDeclaration() } +} + +/** A variable declared in a local scope (as opposed to a package scope or the universal scope). */ +class LocalVariable extends DeclaredVariable { + LocalVariable() { getScope() instanceof LocalScope } + + /** Gets the innermost function containing the scope of this variable, if any. */ + FuncDef getDeclaringFunction() { + result = getScope().(LocalScope).getEnclosingFunctionScope().getFunction() + } + + /** Holds if this variable is referenced inside a nested function. */ + predicate isCaptured() { getDeclaringFunction() != getAReference().getEnclosingFunction() } +} + +/** + * A (named) function parameter. + * + * Note that receiver variables are considered parameters. + */ +class Parameter extends DeclaredVariable { + FuncDef f; + int index; + + Parameter() { + f.(MethodDecl).getReceiverDecl().getNameExpr() = this.getDeclaration() and + index = -1 + or + exists(FuncTypeExpr tp | tp = f.getTypeExpr() | + this = + rank[index + 1](DeclaredVariable parm, int j, int k | + parm.getDeclaration() = tp.getParameterDecl(j).getNameExpr(k) + | + parm order by j, k + ) + ) + } + + /** Gets the function to which this parameter belongs. */ + FuncDef getFunction() { result = f } + + /** + * Gets the index of this parameter among all parameters of the function. + * + * The receiver is considered to have index -1. + */ + int getIndex() { result = index } + + /** Holds if this is the `i`th parameter of function `fd`. */ + predicate isParameterOf(FuncDef fd, int i) { fd = f and i = index } +} + +/** The receiver variable of a method. */ +class ReceiverVariable extends Parameter { + override MethodDecl f; + + ReceiverVariable() { index = -1 } + + /** Holds if this is the receiver variable of method `m`. */ + predicate isReceiverOf(MethodDecl m) { m = f } +} + +/** A (named) function result variable. */ +class ResultVariable extends DeclaredVariable { + FuncDef f; + int index; + + ResultVariable() { + exists(FuncTypeExpr tp | tp = f.getTypeExpr() | + this = + rank[index + 1](DeclaredVariable parm, int j, int k | + parm.getDeclaration() = tp.getResultDecl(j).getNameExpr(k) + | + parm order by j, k + ) + ) + } + + /** Gets the function to which this result variable belongs. */ + FuncDef getFunction() { result = f } + + /** Gets the index of this result among all results of the function. */ + int getIndex() { result = index } + + /** Holds if this is the `i`th result of function `fd`. */ + predicate isResultOf(FuncDef fd, int i) { fd = f and i = index } +} + +/** + * A struct field. + * + * Note that field identity is determined by type identity: if two struct types are identical in + * the sense of the Go language specification (https://golang.org/ref/spec#Type_identity), then + * any of their fields that have the same name are also identical. This, in turn, means that a + * field can have two or more declarations. + * + * For example, consider the following two type declarations: + * + * ```go + * type T1 struct { x int } + * type T2 struct { x int } + * ``` + * + * Types `T1` and `T2` are different, but their underlying struct types are identical. Hence + * the two declarations of `x` refer to the same field. + */ +class Field extends Variable { + StructType declaringType; + + Field() { fieldstructs(this, declaringType) } + + /** Gets the struct type declaring this field. */ + StructType getDeclaringType() { result = declaringType } + + override Package getPackage() { + exists(Type tp | tp.getUnderlyingType() = declaringType | result = tp.getPackage()) + } + + /** + * Holds if this field has name `f` and it belongs to a type with qualified name `tp`. + * + * Note that due to field embedding the same field may have multiple qualified names. + */ + override predicate hasQualifiedName(string tp, string f) { + exists(Type base | + tp = base.getQualifiedName() and + this = base.getField(f) + ) + } + + /** + * Holds if this field has name `f` and it belongs to a type `tp` declared in package `pkg`. + * + * Note that due to field embedding the same field may belong to multiple types. + */ + predicate hasQualifiedName(string pkg, string tp, string f) { + exists(Type base | + base.hasQualifiedName(pkg, tp) and + this = base.getField(f) + ) + } +} + +/** + * A field that belongs to a struct that may be embedded within another struct. + * + * When a selector addresses such a field, it is possible it is implicitly addressing a nested struct. + */ +class PromotedField extends Field { + PromotedField() { this = any(StructType t).getFieldOfEmbedded(_, _, _, _) } +} + +/** A built-in or declared function. */ +class Function extends ValueEntity, @functionobject { + /** Gets a call to this function. */ + pragma[nomagic] + DataFlow::CallNode getACall() { + this = result.getTarget() + or + this.(DeclaredFunction).getFuncDecl() = result.getACallee() + } + + /** Gets the declaration of this function, if any. */ + FuncDecl getFuncDecl() { none() } + + /** Holds if this function has no observable side effects. */ + predicate mayHaveSideEffects() { none() } + + /** + * Holds if this function may return without panicking, exiting the process, or looping forever. + * + * This predicate is an over-approximation: it may hold for functions that can never + * return normally, but it never fails to hold for functions that can. + * + * Note this is declared here and not in `DeclaredFunction` so that library models can override this + * by extending `Function` rather than having to remember to extend `DeclaredFunction`. + */ + predicate mayReturnNormally() { + not mustPanic() and + (ControlFlow::mayReturnNormally(getFuncDecl()) or not exists(getBody())) + } + + /** + * Holds if calling this function may cause a runtime panic. + * + * This predicate is an over-approximation: it may hold for functions that can never + * cause a runtime panic, but it never fails to hold for functions that can. + */ + predicate mayPanic() { any() } + + /** + * Holds if calling this function always causes a runtime panic. + * + * This predicate is an over-approximation: it may not hold for functions that do + * cause a runtime panic, but it never holds for functions that do not. + */ + predicate mustPanic() { none() } + + /** Gets the number of parameters of this function. */ + int getNumParameter() { result = getType().(SignatureType).getNumParameter() } + + /** Gets the type of the `i`th parameter of this function. */ + Type getParameterType(int i) { result = getType().(SignatureType).getParameterType(i) } + + /** Gets the number of results of this function. */ + int getNumResult() { result = getType().(SignatureType).getNumResult() } + + /** Gets the type of the `i`th result of this function. */ + Type getResultType(int i) { result = getType().(SignatureType).getResultType(i) } + + /** Gets the body of this function, if any. */ + BlockStmt getBody() { result = getFuncDecl().getBody() } + + /** Gets the `i`th parameter of this function. */ + Parameter getParameter(int i) { result.isParameterOf(getFuncDecl(), i) } + + /** Gets a parameter of this function. */ + Parameter getAParameter() { result = getParameter(_) } + + /** Gets the `i`th reslt variable of this function. */ + ResultVariable getResult(int i) { result.isResultOf(getFuncDecl(), i) } + + /** Gets a result variable of this function. */ + ResultVariable getAResult() { result = getResult(_) } +} + +/** + * A method, that is, a function with a receiver variable, or a function declared in an interface. + * + * Note that method identity is determined by receiver type identity: if two methods have the same + * name and their receiver types are identical in the sense of the Go language specification + * (https://golang.org/ref/spec#Type_identity), then the two methods are identical as well. + */ +class Method extends Function { + Variable receiver; + + Method() { methodreceivers(this, receiver) } + + override Package getPackage() { + // a method doesn't have a scope, so manually associate it with its receiver's + // package. + result = this.getReceiverType().getPackage() + } + + /** Holds if this method is declared in an interface. */ + predicate isInterfaceMethod() { getReceiverType().getUnderlyingType() instanceof InterfaceType } + + /** Gets the receiver variable of this method. */ + Variable getReceiver() { result = receiver } + + /** Gets the type of the receiver variable of this method. */ + Type getReceiverType() { result = receiver.getType() } + + /** + * Gets the receiver base type of this method, that is, either the base type of the receiver type + * if it is a pointer type, or the receiver type itself if it is not a pointer type. + */ + Type getReceiverBaseType() { + exists(Type recv | recv = getReceiverType() | + if recv instanceof PointerType + then result = recv.(PointerType).getBaseType() + else result = recv + ) + } + + /** Holds if this method has name `m` and belongs to the method set of type `tp` or `*tp`. */ + private predicate isIn(NamedType tp, string m) { + this = tp.getMethod(m) or + this = tp.getPointerType().getMethod(m) + } + + /** + * Holds if this method has name `m` and belongs to the method set of a type `T` or `*T` where + * `T` has qualified name `tp`. + * + * Note that `meth.hasQualifiedName(tp, m)` is almost, but not quite, equivalent to + * `exists(Type t | tp = t.getQualifiedName() and meth = t.getMethod(m))`: the latter + * distinguishes between the method sets of `T` and `*T`, while the former does not. + */ + override predicate hasQualifiedName(string tp, string m) { + exists(NamedType t | + this.isIn(t, m) and + tp = t.getQualifiedName() + ) + } + + /** + * Holds if this method has name `m` and belongs to the method set of a type `T` or `*T` where + * `T` is declared in package `pkg` and has name `tp`. + * + * Note that `meth.hasQualifiedName(pkg, tp, m)` is almost, but not quite, equivalent to + * `exists(Type t | t.hasQualifiedName(pkg, tp) and meth = t.getMethod(m))`: the latter + * distinguishes between the method sets of `T` and `*T`, while the former does not. + */ + predicate hasQualifiedName(string pkg, string tp, string m) { + exists(NamedType t | + this.isIn(t, m) and + t.hasQualifiedName(pkg, tp) + ) + } + + /** + * Holds if this method implements the method `m`, that is, if `m` is a method + * on an interface, and this is a method with the same name on a type that + * implements that interface. + * + * Note that all methods implement themselves, and interface methods _only_ + * implement themselves. + */ + predicate implements(Method m) { + this = m + or + not isInterfaceMethod() and + exists(Type t | + this = t.getMethod(m.getName()) and + t.implements(m.getReceiverType().getUnderlyingType()) + ) + } + + /** + * Holds if this method implements the method that has qualified name `pkg.tp.name`, that is, if + * `pkg.tp.name` is a method on an interface, and this is a method with the same name on a type + * that implements that interface. + */ + predicate implements(string pkg, string tp, string name) { + exists(Method m | m.hasQualifiedName(pkg, tp, name) | this.implements(m)) + } +} + +/** + * A method whose receiver may be embedded within a struct. + * + * When a selector addresses such a method, it is possible it is implicitly addressing a nested struct. + */ +class PromotedMethod extends Method { + PromotedMethod() { this = any(StructType t).getMethodOfEmbedded(_, _, _) } +} + +/** A declared function. */ +class DeclaredFunction extends Function, DeclaredEntity, @declfunctionobject { + override FuncDecl getFuncDecl() { result.getNameExpr() = this.getDeclaration() } + + override predicate mayHaveSideEffects() { + not exists(getBody()) + or + exists(BlockStmt body | body = getBody() | + body.mayHaveSideEffects() + or + // functions declared in files with build constraints may be defined differently + // for different platforms, so allow them to avoid false positives + body.getFile().hasBuildConstraints() + ) + } +} + +/** A built-in function. */ +class BuiltinFunction extends Function, BuiltinEntity, @builtinfunctionobject { + override predicate mayHaveSideEffects() { builtinFunction(getName(), false, _, _) } + + override predicate mayPanic() { builtinFunction(getName(), _, true, _) } + + override predicate mustPanic() { builtinFunction(getName(), _, _, true) } + + /** + * Holds if this function is pure, that is, it has no observable side effects and + * no non-determinism. + */ + predicate isPure() { not mayHaveSideEffects() } +} + +/** A statement label. */ +class Label extends Entity, @labelobject { } + +/** + * Holds if `name` is a built-in function, where + * + * - `isPure` is true if the function has no observable side effects, and false otherwise; + * - `mayPanic` is true if calling this function may cause a panic, and false otherwise; + * - `mustPanic` is ture if calling this function always causes a panic, and false otherwise. + * + * Allocating memory is not considered an observable side effect. + */ +private predicate builtinFunction(string name, boolean isPure, boolean mayPanic, boolean mustPanic) { + name = "append" and isPure = false and mayPanic = false and mustPanic = false + or + name = "cap" and isPure = true and mayPanic = false and mustPanic = false + or + name = "close" and isPure = false and mayPanic = true and mustPanic = false + or + name = "complex" and isPure = true and mayPanic = true and mustPanic = false + or + name = "copy" and isPure = false and mayPanic = true and mustPanic = false + or + name = "delete" and isPure = false and mayPanic = false and mustPanic = false + or + name = "imag" and isPure = true and mayPanic = false and mustPanic = false + or + name = "len" and isPure = true and mayPanic = false and mustPanic = false + or + name = "make" and isPure = true and mayPanic = true and mustPanic = false + or + name = "new" and isPure = true and mayPanic = false and mustPanic = false + or + name = "panic" and isPure = false and mayPanic = true and mustPanic = true + or + name = "print" and isPure = false and mayPanic = false and mustPanic = false + or + name = "println" and isPure = false and mayPanic = false and mustPanic = false + or + name = "real" and isPure = true and mayPanic = false and mustPanic = false + or + name = "recover" and isPure = false and mayPanic = false and mustPanic = false +} + +/** Provides helper predicates for working with built-in objects from the universe scope. */ +module Builtin { + // built-in types + /** Gets the built-in type `bool`. */ + BuiltinType bool() { result.getName() = "bool" } + + /** Gets the built-in type `byte`. */ + BuiltinType byte() { result.getName() = "byte" } + + /** Gets the built-in type `complex64`. */ + BuiltinType complex64() { result.getName() = "complex64" } + + /** Gets the built-in type `complex128`. */ + BuiltinType complex128() { result.getName() = "complex128" } + + /** Gets the built-in type `error`. */ + BuiltinType error() { result.getName() = "error" } + + /** Gets the built-in type `float32`. */ + BuiltinType float32() { result.getName() = "float32" } + + /** Gets the built-in type `float64`. */ + BuiltinType float64() { result.getName() = "float64" } + + /** Gets the built-in type `int`. */ + BuiltinType int_() { result.getName() = "int" } + + /** Gets the built-in type `int8`. */ + BuiltinType int8() { result.getName() = "int8" } + + /** Gets the built-in type `int16`. */ + BuiltinType int16() { result.getName() = "int16" } + + /** Gets the built-in type `int32`. */ + BuiltinType int32() { result.getName() = "int32" } + + /** Gets the built-in type `int64`. */ + BuiltinType int64() { result.getName() = "int64" } + + /** Gets the built-in type `rune`. */ + BuiltinType rune() { result.getName() = "rune" } + + /** Gets the built-in type `string`. */ + BuiltinType string_() { result.getName() = "string" } + + /** Gets the built-in type `uint`. */ + BuiltinType uint() { result.getName() = "uint" } + + /** Gets the built-in type `uint8`. */ + BuiltinType uint8() { result.getName() = "uint8" } + + /** Gets the built-in type `uint16`. */ + BuiltinType uint16() { result.getName() = "uint16" } + + /** Gets the built-in type `uint32`. */ + BuiltinType uint32() { result.getName() = "uint32" } + + /** Gets the built-in type `uint64`. */ + BuiltinType uint64() { result.getName() = "uint64" } + + /** Gets the built-in type `uintptr`. */ + BuiltinType uintptr() { result.getName() = "uintptr" } + + // built-in constants + /** Gets the built-in constant `true`. */ + BuiltinConstant true_() { result.getName() = "true" } + + /** Gets the built-in constant `false`. */ + BuiltinConstant false_() { result.getName() = "false" } + + /** Gets the built-in constant corresponding to `b`. */ + BuiltinConstant bool(boolean b) { + b = true and result = true_() + or + b = false and result = false_() + } + + /** Gets the built-in constant `iota`. */ + BuiltinConstant iota() { result.getName() = "iota" } + + // built-in zero value + /** Gets the built-in zero-value `nil`. */ + BuiltinConstant nil() { result.getName() = "nil" } + + /** Gets the built-in function `append`. */ + BuiltinFunction append() { result.getName() = "append" } + + /** Gets the built-in function `cap`. */ + BuiltinFunction cap() { result.getName() = "cap" } + + /** Gets the built-in function `close`. */ + BuiltinFunction close() { result.getName() = "close" } + + /** Gets the built-in function `complex`. */ + BuiltinFunction complex() { result.getName() = "complex" } + + /** Gets the built-in function `copy`. */ + BuiltinFunction copy() { result.getName() = "copy" } + + /** Gets the built-in function `delete`. */ + BuiltinFunction delete() { result.getName() = "delete" } + + /** Gets the built-in function `imag`. */ + BuiltinFunction imag() { result.getName() = "imag" } + + /** Gets the built-in function `len`. */ + BuiltinFunction len() { result.getName() = "len" } + + /** Gets the built-in function `make`. */ + BuiltinFunction make() { result.getName() = "make" } + + /** Gets the built-in function `new`. */ + BuiltinFunction new() { result.getName() = "new" } + + /** Gets the built-in function `panic`. */ + BuiltinFunction panic() { result.getName() = "panic" } + + /** Gets the built-in function `print`. */ + BuiltinFunction print() { result.getName() = "print" } + + /** Gets the built-in function `println`. */ + BuiltinFunction println() { result.getName() = "println" } + + /** Gets the built-in function `real`. */ + BuiltinFunction real() { result.getName() = "real" } + + /** Gets the built-in function `recover`. */ + BuiltinFunction recover() { result.getName() = "recover" } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Stmt.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Stmt.qll new file mode 100644 index 00000000000..9873bf1db17 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Stmt.qll @@ -0,0 +1,1135 @@ +/** + * Provides classes for working with statements. + */ + +import go + +/** + * A statement. + * + * Examples: + * + * ```go + * a = 0 + * + * if x := f(); x < y { + * return y - x + * } else { + * return x - y + * } + * ``` + */ +class Stmt extends @stmt, ExprParent, StmtParent { + /** + * Gets the kind of this statement, which is an integer value representing the statement's + * node type. + * + * Note that the mapping from node types to integer kinds is considered an implementation detail + * and subject to change without notice. + */ + int getKind() { stmts(this, result, _, _) } + + /** + * Holds if the execution of this statement may produce observable side effects. + * + * Memory allocation is not considered an observable side effect. + */ + predicate mayHaveSideEffects() { none() } + + /** Gets the first control-flow node in this statement. */ + ControlFlow::Node getFirstControlFlowNode() { result.isFirstNodeOf(this) } +} + +/** + * A bad statement, that is, a statement that could not be parsed. + * + * Examples: + * + * ```go + * go fmt.Println + * defer int + * ``` + */ +class BadStmt extends @badstmt, Stmt { + override string toString() { result = "bad statement" } + + override string getAPrimaryQlClass() { result = "BadStmt" } +} + +/** + * A declaration statement. + * + * Examples: + * + * ```go + * var i int + * const pi = 3.14159 + * type Printer interface{ Print() } + * ``` + */ +class DeclStmt extends @declstmt, Stmt, DeclParent { + /** Gets the declaration in this statement. */ + Decl getDecl() { result = getDecl(0) } + + override predicate mayHaveSideEffects() { getDecl().mayHaveSideEffects() } + + override string toString() { result = "declaration statement" } + + override string getAPrimaryQlClass() { result = "DeclStmt" } +} + +/** + * An empty statement. + * + * Examples: + * + * ```go + * ; + * ``` + */ +class EmptyStmt extends @emptystmt, Stmt { + override string toString() { result = "empty statement" } + + override string getAPrimaryQlClass() { result = "EmptyStmt" } +} + +/** + * A labeled statement. + * + * Examples: + * + * ```go + * Error: log.Panic("error encountered") + * ``` + */ +class LabeledStmt extends @labeledstmt, Stmt { + /** Gets the identifier representing the label. */ + Ident getLabelExpr() { result = getChildExpr(0) } + + /** Gets the label. */ + string getLabel() { result = getLabelExpr().getName() } + + /** Gets the statement that is being labeled. */ + Stmt getStmt() { result = getChildStmt(1) } + + override predicate mayHaveSideEffects() { getStmt().mayHaveSideEffects() } + + override string toString() { result = "labeled statement" } + + override string getAPrimaryQlClass() { result = "LabeledStmt" } +} + +/** + * An expression statement. + * + * Examples: + * + * ```go + * h(x+y) + * f.Close() + * <-ch + * (<-ch) + * ``` + */ +class ExprStmt extends @exprstmt, Stmt { + /** Gets the expression. */ + Expr getExpr() { result = getChildExpr(0) } + + override predicate mayHaveSideEffects() { getExpr().mayHaveSideEffects() } + + override string toString() { result = "expression statement" } + + override string getAPrimaryQlClass() { result = "ExprStmt" } +} + +/** + * A send statement. + * + * Examples: + * + * ```go + * ch <- 3 + * ``` + */ +class SendStmt extends @sendstmt, Stmt { + /** Gets the expression representing the channel. */ + Expr getChannel() { result = getChildExpr(0) } + + /** Gets the expression representing the value being sent. */ + Expr getValue() { result = getChildExpr(1) } + + override predicate mayHaveSideEffects() { any() } + + override string toString() { result = "send statement" } + + override string getAPrimaryQlClass() { result = "SendStmt" } +} + +/** + * An increment or decrement statement. + * + * Examples: + * + * ```go + * a++ + * b-- + * ``` + */ +class IncDecStmt extends @incdecstmt, Stmt { + /** Gets the expression being incremented or decremented. */ + Expr getOperand() { result = getChildExpr(0) } + + /** Gets the increment or decrement operator. */ + string getOperator() { none() } + + override predicate mayHaveSideEffects() { any() } +} + +/** + * An increment statement. + * + * Examples: + * + * ```go + * a++ + * ``` + */ +class IncStmt extends @incstmt, IncDecStmt { + override string getOperator() { result = "++" } + + override string toString() { result = "increment statement" } + + override string getAPrimaryQlClass() { result = "IncStmt" } +} + +/** + * A decrement statement. + * + * Examples: + * + * ```go + * b-- + * ``` + */ +class DecStmt extends @decstmt, IncDecStmt { + override string getOperator() { result = "--" } + + override string toString() { result = "decrement statement" } + + override string getAPrimaryQlClass() { result = "DecStmt" } +} + +/** + * A (simple or compound) assignment statement. + * + * Examples: + * + * ```go + * x := 1 + * *p = f() + * a[i] = 23 + * (k) = <-ch // same as: k = <-ch + * a += 2 + * ``` + */ +class Assignment extends @assignment, Stmt { + /** Gets the `i`th left-hand side of this assignment (0-based). */ + Expr getLhs(int i) { + i >= 0 and + result = getChildExpr(-(i + 1)) + } + + /** Gets a left-hand side of this assignment. */ + Expr getAnLhs() { result = getLhs(_) } + + /** Gets the number of left-hand sides of this assignment. */ + int getNumLhs() { result = count(getAnLhs()) } + + /** Gets the unique left-hand side of this assignment, if there is only one. */ + Expr getLhs() { getNumLhs() = 1 and result = getLhs(0) } + + /** Gets the `i`th right-hand side of this assignment (0-based). */ + Expr getRhs(int i) { + i >= 0 and + result = getChildExpr(i + 1) + } + + /** Gets a right-hand side of this assignment. */ + Expr getAnRhs() { result = getRhs(_) } + + /** Gets the number of right-hand sides of this assignment. */ + int getNumRhs() { result = count(getAnRhs()) } + + /** Gets the unique right-hand side of this assignment, if there is only one. */ + Expr getRhs() { getNumRhs() = 1 and result = getRhs(0) } + + /** Holds if this assignment assigns `rhs` to `lhs`. */ + predicate assigns(Expr lhs, Expr rhs) { exists(int i | lhs = getLhs(i) and rhs = getRhs(i)) } + + /** Gets the assignment operator in this statement. */ + string getOperator() { none() } + + override predicate mayHaveSideEffects() { any() } + + override string toString() { result = "... " + getOperator() + " ..." } +} + +/** + * A simple assignment statement, that is, an assignment without a compound operator. + * + * Examples: + * + * ```go + * x := 1 + * *p = f() + * a[i] = 23 + * (k) = <-ch // same as: k = <-ch + * ``` + */ +class SimpleAssignStmt extends @simpleassignstmt, Assignment { + override string getAPrimaryQlClass() { result = "SimpleAssignStmt" } +} + +/** + * A plain assignment statement. + * + * Examples: + * + * ```go + * *p = f() + * a[i] = 23 + * (k) = <-ch // same as: k = <-ch + * ``` + */ +class AssignStmt extends @assignstmt, SimpleAssignStmt { + override string getOperator() { result = "=" } + + override string getAPrimaryQlClass() { result = "AssignStmt" } +} + +/** + * A define statement. + * + * Examples: + * + * ```go + * x := 1 + * ``` + */ +class DefineStmt extends @definestmt, SimpleAssignStmt { + override string getOperator() { result = ":=" } + + override string getAPrimaryQlClass() { result = "DefineStmt" } +} + +/** + * A compound assignment statement. + * + * Examples: + * + * ```go + * a += 2 + * a /= 2 + * ``` + */ +class CompoundAssignStmt extends @compoundassignstmt, Assignment { } + +/** + * An add-assign statement using `+=`. + * + * Examples: + * + * ```go + * a += 2 + * ``` + */ +class AddAssignStmt extends @addassignstmt, CompoundAssignStmt { + override string getOperator() { result = "+=" } + + override string getAPrimaryQlClass() { result = "AddAssignStmt" } +} + +/** + * A subtract-assign statement using `-=`. + * + * Examples: + * + * ```go + * a -= 2 + * ``` + */ +class SubAssignStmt extends @subassignstmt, CompoundAssignStmt { + override string getOperator() { result = "-=" } + + override string getAPrimaryQlClass() { result = "SubAssignStmt" } +} + +/** + * A multiply-assign statement using `*=`. + * + * Examples: + * + * ```go + * a *= 2 + * ``` + */ +class MulAssignStmt extends @mulassignstmt, CompoundAssignStmt { + override string getOperator() { result = "*=" } + + override string getAPrimaryQlClass() { result = "MulAssignStmt" } +} + +/** + * A divide-assign statement using `/=`. + * + * Examples: + * + * ```go + * a /= 2 + * ``` + */ +class QuoAssignStmt extends @quoassignstmt, CompoundAssignStmt { + override string getOperator() { result = "/=" } + + override string getAPrimaryQlClass() { result = "QuoAssignStmt" } +} + +class DivAssignStmt = QuoAssignStmt; + +/** + * A modulo-assign statement using `%=`. + * + * Examples: + * + * ```go + * a %= 2 + * ``` + */ +class RemAssignStmt extends @remassignstmt, CompoundAssignStmt { + override string getOperator() { result = "%=" } + + override string getAPrimaryQlClass() { result = "RemAssignStmt" } +} + +class ModAssignStmt = RemAssignStmt; + +/** + * An and-assign statement using `&=`. + * + * Examples: + * + * ```go + * a &= 2 + * ``` + */ +class AndAssignStmt extends @andassignstmt, CompoundAssignStmt { + override string getOperator() { result = "&=" } + + override string getAPrimaryQlClass() { result = "AndAssignStmt" } +} + +/** + * An or-assign statement using `|=`. + * + * Examples: + * + * ```go + * a |= 2 + * ``` + */ +class OrAssignStmt extends @orassignstmt, CompoundAssignStmt { + override string getOperator() { result = "|=" } + + override string getAPrimaryQlClass() { result = "OrAssignStmt" } +} + +/** + * An xor-assign statement using `^=`. + * + * Examples: + * + * ```go + * a ^= 2 + * ``` + */ +class XorAssignStmt extends @xorassignstmt, CompoundAssignStmt { + override string getOperator() { result = "^=" } + + override string getAPrimaryQlClass() { result = "XorAssignStmt" } +} + +/** + * A left-shift-assign statement using `<<=`. + * + * Examples: + * + * ```go + * a <<= 2 + * ``` + */ +class ShlAssignStmt extends @shlassignstmt, CompoundAssignStmt { + override string getOperator() { result = "<<=" } + + override string getAPrimaryQlClass() { result = "ShlAssignStmt" } +} + +class LShiftAssignStmt = ShlAssignStmt; + +/** + * A right-shift-assign statement using `>>=`. + * + * Examples: + * + * ```go + * a >>= 2 + * ``` + */ +class ShrAssignStmt extends @shrassignstmt, CompoundAssignStmt { + override string getOperator() { result = ">>=" } + + override string getAPrimaryQlClass() { result = "ShrAssignStmt" } +} + +class RShiftAssignStmt = ShrAssignStmt; + +/** + * An and-not-assign statement using `&^=`. + * + * Examples: + * + * ```go + * a &^= 2 + * ``` + */ +class AndNotAssignStmt extends @andnotassignstmt, CompoundAssignStmt { + override string getOperator() { result = "&^=" } + + override string getAPrimaryQlClass() { result = "AndNotAssignStmt" } +} + +/** + * A `go` statement. + * + * Examples: + * + * ```go + * go fillPixels(row) + * ``` + */ +class GoStmt extends @gostmt, Stmt { + /** Gets the call. */ + CallExpr getCall() { result = getChildExpr(0) } + + override predicate mayHaveSideEffects() { getCall().mayHaveSideEffects() } + + override string toString() { result = "go statement" } + + override string getAPrimaryQlClass() { result = "GoStmt" } +} + +/** + * A `defer` statement. + * + * Examples: + * + * ```go + * defer mutex.Unlock() + * ``` + */ +class DeferStmt extends @deferstmt, Stmt { + /** Gets the call being deferred. */ + CallExpr getCall() { result = getChildExpr(0) } + + override predicate mayHaveSideEffects() { getCall().mayHaveSideEffects() } + + override string toString() { result = "defer statement" } + + override string getAPrimaryQlClass() { result = "DeferStmt" } +} + +/** + * A `return` statement. + * + * Examples: + * + * ```go + * return x + * ``` + */ +class ReturnStmt extends @returnstmt, Stmt { + /** Gets the `i`th returned expression (0-based) */ + Expr getExpr(int i) { result = getChildExpr(i) } + + /** Gets a returned expression. */ + Expr getAnExpr() { result = getExpr(_) } + + /** Gets the number of returned expressions. */ + int getNumExpr() { result = count(getAnExpr()) } + + /** Gets the unique returned expression, if there is only one. */ + Expr getExpr() { getNumChild() = 1 and result = getExpr(0) } + + override predicate mayHaveSideEffects() { getExpr().mayHaveSideEffects() } + + override string toString() { result = "return statement" } + + override string getAPrimaryQlClass() { result = "ReturnStmt" } +} + +/** + * A branch statement, for example a `break` or `goto`. + * + * Examples: + * + * ```go + * break + * break OuterLoop + * continue + * continue RowLoop + * goto Error + * fallthrough + * ``` + */ +class BranchStmt extends @branchstmt, Stmt { + /** Gets the expression denoting the target label of the branch, if any. */ + Ident getLabelExpr() { result = getChildExpr(0) } + + /** Gets the target label of the branch, if any. */ + string getLabel() { result = getLabelExpr().getName() } +} + +/** + * A `break` statement. + * + * Examples: + * + * ```go + * break + * break OuterLoop + * ``` + */ +class BreakStmt extends @breakstmt, BranchStmt { + override string toString() { result = "break statement" } + + override string getAPrimaryQlClass() { result = "BreakStmt" } +} + +/** + * A `continue` statement. + * + * Examples: + * + * ```go + * continue + * continue RowLoop + * ``` + */ +class ContinueStmt extends @continuestmt, BranchStmt { + override string toString() { result = "continue statement" } + + override string getAPrimaryQlClass() { result = "ContinueStmt" } +} + +/** + * A `goto` statement. + * + * Examples: + * + * ```go + * goto Error + * ``` + */ +class GotoStmt extends @gotostmt, BranchStmt { + override string toString() { result = "goto statement" } + + override string getAPrimaryQlClass() { result = "GotoStmt" } +} + +/** + * A `fallthrough` statement. + * + * Examples: + * + * ```go + * fallthrough + * ``` + */ +class FallthroughStmt extends @fallthroughstmt, BranchStmt { + override string toString() { result = "fallthrough statement" } + + override string getAPrimaryQlClass() { result = "FallthroughStmt" } +} + +/** + * A block statement. + * + * Examples: + * + * ```go + * { + * fmt.Printf("iteration %d\n", i) + * f(i) + * } + * ``` + */ +class BlockStmt extends @blockstmt, Stmt, ScopeNode { + /** Gets the `i`th statement in this block (0-based). */ + Stmt getStmt(int i) { result = getChildStmt(i) } + + /** Gets a statement in this block. */ + Stmt getAStmt() { result = getAChildStmt() } + + /** Gets the number of statements in this block. */ + int getNumStmt() { result = getNumChildStmt() } + + override predicate mayHaveSideEffects() { getAStmt().mayHaveSideEffects() } + + override string toString() { result = "block statement" } + + override string getAPrimaryQlClass() { result = "BlockStmt" } +} + +/** + * An `if` statement. + * + * Examples: + * + * ```go + * if x := f(); x < y { + * return y - x + * } else { + * return x - y + * } + * ``` + */ +class IfStmt extends @ifstmt, Stmt, ScopeNode { + /** Gets the init statement of this `if` statement, if any. */ + Stmt getInit() { result = getChildStmt(0) } + + /** Gets the condition of this `if` statement. */ + Expr getCond() { result = getChildExpr(1) } + + /** Gets the "then" branch of this `if` statement. */ + BlockStmt getThen() { result = getChildStmt(2) } + + /** Gets the "else" branch of this `if` statement, if any. */ + Stmt getElse() { result = getChildStmt(3) } + + override predicate mayHaveSideEffects() { + getInit().mayHaveSideEffects() or + getCond().mayHaveSideEffects() or + getThen().mayHaveSideEffects() or + getElse().mayHaveSideEffects() + } + + override string toString() { result = "if statement" } + + override string getAPrimaryQlClass() { result = "IfStmt" } +} + +/** + * A `case` or `default` clause in a `switch` statement. + * + * Examples: + * + * ```go + * case 0, 1: + * a = 1 + * fallthrough + * + * default: + * b = 2 + * + * case func(int) float64: + * printFunction(i) + * ``` + */ +class CaseClause extends @caseclause, Stmt, ScopeNode { + /** Gets the `i`th expression of this `case` clause (0-based). */ + Expr getExpr(int i) { result = getChildExpr(-(i + 1)) } + + /** Gets an expression of this `case` clause. */ + Expr getAnExpr() { result = getAChildExpr() } + + /** Gets the number of expressions of this `case` clause. */ + int getNumExpr() { result = getNumChildExpr() } + + /** Gets the `i`th statement of this `case` clause (0-based). */ + Stmt getStmt(int i) { result = getChildStmt(i) } + + /** Gets a statement of this `case` clause. */ + Stmt getAStmt() { result = getAChildStmt() } + + /** Gets the number of statements of this `case` clause. */ + int getNumStmt() { result = getNumChildStmt() } + + override predicate mayHaveSideEffects() { + getAnExpr().mayHaveSideEffects() or + getAStmt().mayHaveSideEffects() + } + + override string toString() { result = "case clause" } + + override string getAPrimaryQlClass() { result = "CaseClause" } +} + +/** + * A `switch` statement, that is, either an expression switch or a type switch. + * + * Examples: + * + * ```go + * switch x := f(); x { + * case 0, 1: + * a = 1 + * fallthrough + * default: + * b = 2 + * } + * + * switch i := x.(type) { + * default: + * printString("don't know the type") + * case nil: + * printString("x is nil") + * case int: + * printInt(i) + * case func(int) float64: + * printFunction(i) + * } + * ``` + */ +class SwitchStmt extends @switchstmt, Stmt, ScopeNode { + /** Gets the init statement of this `switch` statement, if any. */ + Stmt getInit() { result = getChildStmt(0) } + + /** Gets the body of this `switch` statement. */ + BlockStmt getBody() { result = getChildStmt(2) } + + /** Gets the `i`th case clause of this `switch` statement (0-based). */ + CaseClause getCase(int i) { result = getBody().getStmt(i) } + + /** Gets a case clause of this `switch` statement. */ + CaseClause getACase() { result = getCase(_) } + + /** Gets the number of case clauses in this `switch` statement. */ + int getNumCase() { result = count(getACase()) } + + /** Gets the `i`th non-default case clause of this `switch` statement (0-based). */ + CaseClause getNonDefaultCase(int i) { + result = + rank[i + 1](CaseClause cc, int j | cc = getCase(j) and exists(cc.getExpr(_)) | cc order by j) + } + + /** Gets a non-default case clause of this `switch` statement. */ + CaseClause getANonDefaultCase() { result = getNonDefaultCase(_) } + + /** Gets the number of non-default case clauses in this `switch` statement. */ + int getNumNonDefaultCase() { result = count(getANonDefaultCase()) } + + /** Gets the default case clause of this `switch` statement, if any. */ + CaseClause getDefault() { result = getACase() and not exists(result.getExpr(_)) } +} + +/** + * An expression-switch statement. + * + * Examples: + * + * ```go + * switch x := f(); x { + * case 0, 1: + * a = 1 + * fallthrough + * default: + * b = 2 + * } + * ``` + */ +class ExpressionSwitchStmt extends @exprswitchstmt, SwitchStmt { + /** Gets the switch expression of this `switch` statement. */ + Expr getExpr() { result = getChildExpr(1) } + + override predicate mayHaveSideEffects() { + getInit().mayHaveSideEffects() or + getBody().mayHaveSideEffects() + } + + override string toString() { result = "expression-switch statement" } + + override string getAPrimaryQlClass() { result = "ExpressionSwitchStmt" } +} + +/** + * A type-switch statement. + * + * Examples: + * + * ```go + * switch i := x.(type) { + * default: + * printString("don't know the type") // type of i is type of x (interface{}) + * case nil: + * printString("x is nil") // type of i is type of x (interface{}) + * case int: + * printInt(i) // type of i is int + * case func(int) float64: + * printFunction(i) // type of i is func(int) float64 + * } + * ``` + */ +class TypeSwitchStmt extends @typeswitchstmt, SwitchStmt { + /** Gets the assign statement of this type-switch statement. */ + SimpleAssignStmt getAssign() { result = getChildStmt(1) } + + /** Gets the test statement of this type-switch statement. This is a `SimpleAssignStmt` or `ExprStmt`. */ + Stmt getTest() { result = getChildStmt(1) } + + /** Gets the expression whose type is examined by this `switch` statement. */ + Expr getExpr() { result = getAssign().getRhs() or result = getChildStmt(1).(ExprStmt).getExpr() } + + override predicate mayHaveSideEffects() { any() } + + override string toString() { result = "type-switch statement" } + + override string getAPrimaryQlClass() { result = "TypeSwitchStmt" } +} + +/** + * A comm clause, that is, a `case` or `default` clause in a `select` statement. + * + * Examples: + * + * ```go + * case i1 = <-c1: + * print("received ", i1, " from c1\n") + * + * case c2 <- i2: + * print("sent ", i2, " to c2\n") + * + * case i3, ok := (<-c3): // same as: i3, ok := <-c3 + * if ok { + * print("received ", i3, " from c3\n") + * } else { + * print("c3 is closed\n") + * } + * + * default: + * print("no communication\n") + * ``` + */ +class CommClause extends @commclause, Stmt, ScopeNode { + /** Gets the comm statement of this clause, if any. */ + Stmt getComm() { result = getChildStmt(0) } + + /** Gets the `i`th statement of this clause (0-based). */ + Stmt getStmt(int i) { i >= 0 and result = getChildStmt(i + 1) } + + /** Gets a statement of this clause. */ + Stmt getAStmt() { result = getStmt(_) } + + /** Gets the number of statements of this clause. */ + int getNumStmt() { result = count(getAStmt()) } + + override predicate mayHaveSideEffects() { getAStmt().mayHaveSideEffects() } + + override string toString() { result = "comm clause" } + + override string getAPrimaryQlClass() { result = "CommClause" } +} + +/** + * A receive statement in a comm clause. + * + * Examples: + * + * ```go + * i1 = <-c1 + * i3, ok := <-c3 + * i3, ok := (<-c3) + * ``` + */ +class RecvStmt extends Stmt { + RecvStmt() { this = any(CommClause cc).getComm() and not this instanceof SendStmt } + + /** Gets the `i`th left-hand-side expression of this receive statement, if any. */ + Expr getLhs(int i) { result = this.(Assignment).getLhs(i) } + + /** Gets the number of left-hand-side expressions of this receive statement. */ + int getNumLhs() { result = count(getLhs(_)) } + + /** Gets the receive expression of this receive statement. */ + RecvExpr getExpr() { + result = this.(ExprStmt).getExpr() or + result = this.(Assignment).getRhs() + } + + override string getAPrimaryQlClass() { result = "RecvStmt" } +} + +/** + * A `select` statement. + * + * Examples: + * + * ```go + * select { + * case i1 = <-c1: + * print("received ", i1, " from c1\n") + * case c2 <- i2: + * print("sent ", i2, " to c2\n") + * case i3, ok := (<-c3): // same as: i3, ok := <-c3 + * if ok { + * print("received ", i3, " from c3\n") + * } else { + * print("c3 is closed\n") + * } + * default: + * print("no communication\n") + * } + * ``` + */ +class SelectStmt extends @selectstmt, Stmt { + /** Gets the body of this `select` statement. */ + BlockStmt getBody() { result = getChildStmt(0) } + + /** + * Gets the `i`th comm clause (that is, `case` or `default` clause) in this `select` statement. + */ + CommClause getCommClause(int i) { result = getBody().getStmt(i) } + + /** + * Gets a comm clause in this `select` statement. + */ + CommClause getACommClause() { result = getCommClause(_) } + + /** Gets the `i`th `case` clause in this `select` statement. */ + CommClause getNonDefaultCommClause(int i) { + result = + rank[i + 1](CommClause cc, int j | + cc = getCommClause(j) and exists(cc.getComm()) + | + cc order by j + ) + } + + /** Gets the number of `case` clauses in this `select` statement. */ + int getNumNonDefaultCommClause() { result = count(getNonDefaultCommClause(_)) } + + /** Gets the `default` clause in this `select` statement, if any. */ + CommClause getDefaultCommClause() { + result = getCommClause(_) and + not exists(result.getComm()) + } + + override predicate mayHaveSideEffects() { any() } + + override string toString() { result = "select statement" } + + override string getAPrimaryQlClass() { result = "SelectStmt" } +} + +/** + * A loop, that is, either a `for` statement or a `range` statement. + * + * Examples: + * + * ```go + * for a < b { + * a *= 2 + * } + * + * for i := 0; i < 10; i++ { + * f(i) + * } + * + * for key, value := range mymap { + * fmt.Printf("mymap[%s] = %d\n", key, value) + * } + * ``` + */ +class LoopStmt extends @loopstmt, Stmt, ScopeNode { + /** Gets the body of this loop. */ + BlockStmt getBody() { none() } +} + +/** + * A `for` statement. + * + * Examples: + * + * ```go + * for a < b { + * a *= 2 + * } + * + * for i := 0; i < 10; i++ { + * f(i) + * } + * ``` + */ +class ForStmt extends @forstmt, LoopStmt { + /** Gets the init statement of this `for` statement, if any. */ + Stmt getInit() { result = getChildStmt(0) } + + /** Gets the condition of this `for` statement. */ + Expr getCond() { result = getChildExpr(1) } + + /** Gets the post statement of this `for` statement. */ + Stmt getPost() { result = getChildStmt(2) } + + override BlockStmt getBody() { result = getChildStmt(3) } + + override predicate mayHaveSideEffects() { + getInit().mayHaveSideEffects() or + getCond().mayHaveSideEffects() or + getPost().mayHaveSideEffects() or + getBody().mayHaveSideEffects() + } + + override string toString() { result = "for statement" } + + override string getAPrimaryQlClass() { result = "ForStmt" } +} + +/** + * A `range` statement. + * + * Examples: + * + * ```go + * for key, value := range mymap { + * fmt.Printf("mymap[%s] = %d\n", key, value) + * } + * + * for _, value = range array { + * fmt.Printf("array contains: %d\n", value) + * } + * + * for index, _ := range str { + * fmt.Printf("str[%d] = ?\n", index) + * } + * + * for value = range ch { + * fmt.Printf("value from channel: %d\n", value) + * } + * ``` + */ +class RangeStmt extends @rangestmt, LoopStmt { + /** Gets the expression denoting the key of this `range` statement. */ + Expr getKey() { result = getChildExpr(0) } + + /** Get the expression denoting the value of this `range` statement. */ + Expr getValue() { result = getChildExpr(1) } + + /** Gets the domain of this `range` statement. */ + Expr getDomain() { result = getChildExpr(2) } + + override BlockStmt getBody() { result = getChildStmt(3) } + + override predicate mayHaveSideEffects() { any() } + + override string toString() { result = "range statement" } + + override string getAPrimaryQlClass() { result = "RangeStmt" } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/StringOps.qll b/repo-tests/codeql-go/ql/lib/semmle/go/StringOps.qll new file mode 100644 index 00000000000..296263b1d44 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/StringOps.qll @@ -0,0 +1,494 @@ +/** + * Provides predicates and classes for working with string operations. + */ + +import go + +/** Provides predicates and classes for working with string operations. */ +module StringOps { + /** + * An expression that is equivalent to `strings.HasPrefix(A, B)` or `!strings.HasPrefix(A, B)`. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `StringOps::HasPrefix::Range` instead. + */ + class HasPrefix extends DataFlow::Node { + HasPrefix::Range range; + + HasPrefix() { range = this } + + /** + * Gets the `A` in `strings.HasPrefix(A, B)`. + */ + DataFlow::Node getBaseString() { result = range.getBaseString() } + + /** + * Gets the `B` in `strings.HasPrefix(A, B)`. + */ + DataFlow::Node getSubstring() { result = range.getSubstring() } + + /** + * Gets the polarity of the check. + * + * If the polarity is `false` the check returns `true` if the string does not start + * with the given substring. + */ + boolean getPolarity() { result = range.getPolarity() } + } + + class StartsWith = HasPrefix; + + /** Provides predicates and classes for working with prefix checks. */ + module HasPrefix { + /** + * An expression that is equivalent to `strings.HasPrefix(A, B)` or `!strings.HasPrefix(A, B)`. + * + * Extend this class to model new APIs. If you want to refine existing API models, extend + * `StringOps::HasPrefix` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the `A` in `strings.HasPrefix(A, B)`. + */ + abstract DataFlow::Node getBaseString(); + + /** + * Gets the `B` in `strings.HasPrefix(A, B)`. + */ + abstract DataFlow::Node getSubstring(); + + /** + * Gets the polarity of the check. + * + * If the polarity is `false` the check returns `true` if the string does not start + * with the given substring. + */ + boolean getPolarity() { result = true } + } + + /** + * An expression of the form `strings.HasPrefix(A, B)`. + */ + private class StringsHasPrefix extends Range, DataFlow::CallNode { + StringsHasPrefix() { getTarget().hasQualifiedName("strings", "HasPrefix") } + + override DataFlow::Node getBaseString() { result = getArgument(0) } + + override DataFlow::Node getSubstring() { result = getArgument(1) } + } + + /** + * Holds if `eq` is of the form `nd == 0` or `nd != 0`. + */ + pragma[noinline] + private predicate comparesToZero(DataFlow::EqualityTestNode eq, DataFlow::Node nd) { + exists(DataFlow::Node zero | + eq.hasOperands(globalValueNumber(nd).getANode(), zero) and + zero.getIntValue() = 0 + ) + } + + /** + * An expression of the form `strings.Index(A, B) == 0`. + */ + private class HasPrefix_IndexOfEquals extends Range, DataFlow::EqualityTestNode { + DataFlow::CallNode indexOf; + + HasPrefix_IndexOfEquals() { + comparesToZero(this, indexOf) and + indexOf.getTarget().hasQualifiedName("strings", "Index") + } + + override DataFlow::Node getBaseString() { result = indexOf.getArgument(0) } + + override DataFlow::Node getSubstring() { result = indexOf.getArgument(1) } + + override boolean getPolarity() { result = expr.getPolarity() } + } + + /** + * Holds if `eq` is of the form `str[0] == rhs` or `str[0] != rhs`. + */ + pragma[noinline] + private predicate comparesFirstCharacter( + DataFlow::EqualityTestNode eq, DataFlow::Node str, DataFlow::Node rhs + ) { + exists(DataFlow::ElementReadNode read | + eq.hasOperands(globalValueNumber(read).getANode(), rhs) and + str = read.getBase() and + str.getType().getUnderlyingType() instanceof StringType and + read.getIndex().getIntValue() = 0 + ) + } + + /** + * A comparison of the form `x[0] == 'k'` for some rune literal `k`. + */ + private class HasPrefix_FirstCharacter extends Range, DataFlow::EqualityTestNode { + DataFlow::Node base; + DataFlow::Node runeLiteral; + + HasPrefix_FirstCharacter() { comparesFirstCharacter(this, base, runeLiteral) } + + override DataFlow::Node getBaseString() { result = base } + + override DataFlow::Node getSubstring() { result = runeLiteral } + + override boolean getPolarity() { result = expr.getPolarity() } + } + + /** + * A comparison of the form `x[:len(y)] == y`. + */ + private class HasPrefix_Substring extends Range, DataFlow::EqualityTestNode { + DataFlow::SliceNode slice; + DataFlow::Node substring; + + HasPrefix_Substring() { + eq(_, slice, substring) and + slice.getLow().getIntValue() = 0 and + ( + exists(DataFlow::CallNode len | + len = Builtin::len().getACall() and + len.getArgument(0) = globalValueNumber(substring).getANode() and + slice.getHigh() = globalValueNumber(len).getANode() + ) + or + substring.getStringValue().length() = slice.getHigh().getIntValue() + ) + } + + override DataFlow::Node getBaseString() { result = slice.getBase() } + + override DataFlow::Node getSubstring() { result = substring } + + override boolean getPolarity() { result = expr.getPolarity() } + } + } + + /** + * A data-flow node that performs string concatenation. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `StringOps::Concatenation::Range` instead. + */ + class Concatenation extends DataFlow::Node { + Concatenation::Range self; + + Concatenation() { this = self } + + /** + * Gets the `n`th operand of this string concatenation, if there is a data-flow node for it. + */ + DataFlow::Node getOperand(int n) { result = self.getOperand(n) } + + /** + * Gets the string value of the `n`th operand of this string concatenation, if it is a constant. + */ + string getOperandStringValue(int n) { result = self.getOperandStringValue(n) } + + /** + * Gets the number of operands of this string concatenation. + */ + int getNumOperand() { result = self.getNumOperand() } + } + + /** Provides predicates and classes for working with string concatenations. */ + module Concatenation { + /** + * A data-flow node that performs string concatenation. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `StringOps::Concatenation` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the `n`th operand of this string concatenation, if there is a data-flow node for it. + */ + abstract DataFlow::Node getOperand(int n); + + /** + * Gets the string value of the `n`th operand of this string concatenation, if it is + * a constant. + */ + string getOperandStringValue(int n) { result = getOperand(n).getStringValue() } + + /** + * Gets the number of operands of this string concatenation. + */ + int getNumOperand() { result = count(getOperand(_)) } + } + + /** A string concatenation using the `+` or `+=` operator. */ + private class PlusConcat extends Range, DataFlow::BinaryOperationNode { + PlusConcat() { + getType() instanceof StringType and + getOperator() = "+" + } + + override DataFlow::Node getOperand(int n) { + n = 0 and result = getLeftOperand() + or + n = 1 and result = getRightOperand() + } + } + + /** + * Gets a regular expression for matching simple format-string components, including flags, + * width and precision specifiers, but not including `*` specifiers or explicit argument + * indices. + */ + pragma[noinline] + private string getFormatComponentRegex() { + exists( + string literal, string opt_flag, string width, string prec, string opt_width_and_prec, + string operator, string verb + | + literal = "([^%]|%%)+" and + opt_flag = "[-+ #0]?" and + width = "\\d+|\\*" and + prec = "\\.(\\d+|\\*)" and + opt_width_and_prec = "(" + width + ")?(" + prec + ")?" and + operator = "[bcdeEfFgGoOpqstTxXUv]" and + verb = "(%" + opt_flag + opt_width_and_prec + operator + ")" + | + result = "(" + literal + "|" + verb + ")" + ) + } + + /** + * A call to `fmt.Sprintf`, considered as a string concatenation. + * + * Only calls with simple format strings (no `*` specifiers, no explicit argument indices) + * are supported. Such format strings can be viewed as sequences of alternating literal and + * non-literal components. A literal component contains no `%` characters except `%%` pairs, + * while a non-literal component consists of `%`, a verb, and possibly flags and specifiers. + * Each non-literal component consumes exactly one argument. + * + * Literal components give rise to concatenation operands that have a string value but no + * data-flow node; non-literal `%s` or `%v` components give rise to concatenation operands + * that do have an associated data-flow node but possibly no string value; any other non-literal + * components give rise to concatenation operands that have neither an associated data-flow + * node nor a string value. This is because verbs like `%q` perform additional string + * transformations that we cannot easily represent. + */ + private class SprintfConcat extends Range, DataFlow::CallNode { + string fmt; + + SprintfConcat() { + exists(Function sprintf | sprintf.hasQualifiedName("fmt", "Sprintf") | + this = sprintf.getACall() and + fmt = getArgument(0).getStringValue() and + fmt.regexpMatch(getFormatComponentRegex() + "*") + ) + } + + /** + * Gets the `n`th component of this format string. + */ + private string getComponent(int n) { + result = fmt.regexpFind(getFormatComponentRegex(), n, _) + } + + override DataFlow::Node getOperand(int n) { + exists(int i, string part | part = "%s" or part = "%v" | + part = getComponent(n) and + i = n / 2 and + result = getArgument(i + 1) + ) + } + + override string getOperandStringValue(int n) { + result = Range.super.getOperandStringValue(n) + or + exists(string cmp | cmp = getComponent(n) | + (cmp.charAt(0) != "%" or cmp.charAt(1) = "%") and + result = cmp.replaceAll("%%", "%") + ) + } + + override int getNumOperand() { result = max(int i | exists(getComponent(i))) + 1 } + } + + /** + * Holds if `src` flows to `dst` through the `n`th operand of the given concatenation operator. + */ + predicate taintStep(DataFlow::Node src, DataFlow::Node dst, Concatenation cat, int n) { + src = cat.getOperand(n) and + dst = cat + } + + /** + * Holds if there is a taint step from `src` to `dst` through string concatenation. + */ + predicate taintStep(DataFlow::Node src, DataFlow::Node dst) { taintStep(src, dst, _, _) } + } + + private newtype TConcatenationElement = + /** A root concatenation element that is not itself an operand of a string concatenation. */ + MkConcatenationRoot(Concatenation cat) { not cat = any(Concatenation parent).getOperand(_) } or + /** A concatenation element that is an operand of a string concatenation. */ + MkConcatenationOperand(Concatenation parent, int i) { i in [0 .. parent.getNumOperand() - 1] } + + /** + * An element of a string concatenation, which either itself performs a string concatenation or + * occurs as an operand in a string concatenation. + * + * For example, the expression `x + y + z` contains the following concatenation + * elements: + * + * - The leaf elements `x`, `y`, and `z` + * - The intermediate element `x + y`, which is both a concatenation and an operand + * - The root element `x + y + z` + */ + class ConcatenationElement extends TConcatenationElement { + /** + * Gets the data-flow node corresponding to this concatenation element, if any. + */ + DataFlow::Node asNode() { + this = MkConcatenationRoot(result) + or + exists(Concatenation parent, int i | this = MkConcatenationOperand(parent, i) | + result = parent.getOperand(i) + ) + } + + /** + * Gets the string value of this concatenation element if it is a constant. + */ + string getStringValue() { + result = asNode().getStringValue() + or + exists(Concatenation parent, int i | this = MkConcatenationOperand(parent, i) | + result = parent.getOperandStringValue(i) + ) + } + + /** + * Gets the `n`th operand of this string concatenation. + */ + ConcatenationOperand getOperand(int n) { result = MkConcatenationOperand(asNode(), n) } + + /** + * Gets an operand of this string concatenation. + */ + ConcatenationOperand getAnOperand() { result = this.getOperand(_) } + + /** + * Gets the number of operands of this string concatenation. + */ + int getNumOperand() { result = count(this.getAnOperand()) } + + /** + * Gets the first operand of this string concatenation. + * + * For example, the first operand of `(x + y) + z` is `(x + y)`. + */ + ConcatenationOperand getFirstOperand() { result = getOperand(0) } + + /** + * Gets the last operand of this string concatenation. + * + * For example, the last operand of `x + (y + z)` is `(y + z)`. + */ + ConcatenationOperand getLastOperand() { result = getOperand(getNumOperand() - 1) } + + /** + * Gets the root of the concatenation tree to which this element belongs. + */ + ConcatenationRoot getConcatenationRoot() { this = result.getAnOperand*() } + + /** + * Gets a leaf in the concatenation tree that this element is the root of. + */ + ConcatenationLeaf getALeaf() { result = this.getAnOperand*() } + + /** + * Gets the first leaf in this concatenation tree. + * + * For example, the first leaf of `(x + y) + z` is `x`. + */ + ConcatenationLeaf getFirstLeaf() { result = getFirstOperand*() } + + /** + * Gets the last leaf in this concatenation tree. + * + * For example, the last leaf of `x + (y + z)` is `z`. + */ + ConcatenationLeaf getLastLeaf() { result = getLastOperand*() } + + /** Gets a textual representation of this concatenation element. */ + string toString() { + if exists(asNode()) + then result = asNode().toString() + else + if exists(getStringValue()) + then result = getStringValue() + else result = "concatenation element" + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + asNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + or + // use dummy location for elements that don't have a corresponding node + not exists(asNode()) and + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } + } + + /** + * One of the operands in a string concatenation. + * + * See `ConcatenationElement` for more information. + */ + class ConcatenationOperand extends ConcatenationElement, MkConcatenationOperand { } + + /** + * A data-flow node that performs a string concatenation, and is not an + * immediate operand in a larger string concatenation. + * + * See `ConcatenationElement` for more information. + */ + class ConcatenationRoot extends ConcatenationElement, MkConcatenationRoot { } + + /** + * An operand to a concatenation that is not itself a concatenation. + * + * See `ConcatenationElement` for more information. + */ + class ConcatenationLeaf extends ConcatenationOperand { + ConcatenationLeaf() { not exists(getAnOperand()) } + + /** + * Gets the operand immediately preceding this one in its parent concatenation. + * + * For example, in `(x + y) + z`, the previous leaf for `z` is `y`. + */ + ConcatenationLeaf getPreviousLeaf() { + exists(ConcatenationElement parent, int i | + result = parent.getOperand(i - 1).getLastLeaf() and + this = parent.getOperand(i).getFirstLeaf() + ) + } + + /** + * Gets the operand immediately succeeding this one in its parent concatenation. + * + * For example, in `(x + y) + z`, the previous leaf for `y` is `z`. + */ + ConcatenationLeaf getNextLeaf() { this = result.getPreviousLeaf() } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Types.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Types.qll new file mode 100644 index 00000000000..b6a9cab810b --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Types.qll @@ -0,0 +1,695 @@ +/** + * Provides classes for working with Go types. + */ + +import go + +/** A Go type. */ +class Type extends @type { + /** Gets the name of this type, if it has one. */ + string getName() { typename(this, result) } + + /** + * Gets the underlying type of this type after any type aliases have been replaced + * with their definition. + */ + Type getUnderlyingType() { result = this } + + /** + * Gets the entity associated with this type. + */ + TypeEntity getEntity() { type_objects(this, result) } + + /** Gets the package in which this type is declared, if any. */ + Package getPackage() { result = this.getEntity().getPackage() } + + /** + * Gets the qualified name of this type, if any. + * + * Only (defined) named types like `io.Writer` have a qualified name. Basic types like `int`, + * pointer types like `*io.Writer`, and other composite types do not have a qualified name. + */ + string getQualifiedName() { result = getEntity().getQualifiedName() } + + /** + * Holds if this type is declared in a package with path `pkg` and has name `name`. + * + * Only (defined) named types like `io.Writer` have a qualified name. Basic types like `int`, + * pointer types like `*io.Writer`, and other composite types do not have a qualified name. + */ + predicate hasQualifiedName(string pkg, string name) { getEntity().hasQualifiedName(pkg, name) } + + /** + * Holds if the method set of this type contains a method named `m` of type `t`. + */ + predicate hasMethod(string m, SignatureType t) { t = getMethod(m).getType() } + + /** + * Gets the method `m` belonging to the method set of this type, if any. + * + * Note that this predicate never has a result for struct types. Methods are associated + * with the corresponding named type instead. + */ + Method getMethod(string m) { + result.getReceiverType() = this and + result.getName() = m + } + + /** + * Gets the field `f` of this type. + * + * This includes fields promoted from an embedded field. + */ + Field getField(string f) { result = getUnderlyingType().getField(f) } + + /** + * Holds if this type implements interface `i`, that is, the method set of `i` + * is contained in the method set of this type. + */ + predicate implements(InterfaceType i) { + isEmptyInterface(i) + or + this.hasMethod(getExampleMethodName(i), _) and + forall(string m, SignatureType t | i.hasMethod(m, t) | this.hasMethod(m, t)) + } + + /** + * Holds if this type implements an interface that has the qualified name `pkg.name`, + * that is, the method set of `pkg.name` is contained in the method set of this type. + */ + predicate implements(string pkg, string name) { + exists(Type t | t.hasQualifiedName(pkg, name) | this.implements(t.getUnderlyingType())) + } + + /** + * Gets the pointer type that has this type as its base type. + */ + PointerType getPointerType() { result.getBaseType() = this } + + /** + * Gets a pretty-printed representation of this type, including its structure where applicable. + */ + string pp() { result = toString() } + + /** + * Gets a basic textual representation of this type. + */ + string toString() { result = getName() } +} + +/** An invalid type. */ +class InvalidType extends @invalidtype, Type { + override string toString() { result = "invalid type" } +} + +/** A basic type. */ +class BasicType extends @basictype, Type { } + +/** Either the normal or literal boolean type */ +class BoolType extends @booltype, BasicType { } + +/** The `bool` type of a non-literal expression */ +class BoolExprType extends @boolexprtype, BoolType { + override string getName() { result = "bool" } +} + +/** A numeric type such as `int` or `float64`. */ +class NumericType extends @numerictype, BasicType { + /** + * Gets the implementation-independent size (in bits) of this numeric type. + * + * This predicate is not defined for types with an implementation-specific size, that is, + * `uint`, `int` or `uintptr`. + */ + int getSize() { none() } + + /** + * Gets a possible implementation-specific size (in bits) of this numeric type. + * + * This predicate is not defined for `uintptr` since the language specification says nothing + * about its size. + */ + int getASize() { result = getSize() } +} + +/** An integer type such as `int` or `uint64`. */ +class IntegerType extends @integertype, NumericType { } + +/** A signed integer type such as `int`. */ +class SignedIntegerType extends @signedintegertype, IntegerType { } + +/** The type `int`. */ +class IntType extends @inttype, SignedIntegerType { + override int getASize() { result = 32 or result = 64 } + + override string getName() { result = "int" } +} + +/** The type `int8`. */ +class Int8Type extends @int8type, SignedIntegerType { + override int getSize() { result = 8 } + + override string getName() { result = "int8" } +} + +/** The type `int16`. */ +class Int16Type extends @int16type, SignedIntegerType { + override int getSize() { result = 16 } + + override string getName() { result = "int16" } +} + +/** The type `int32`. */ +class Int32Type extends @int32type, SignedIntegerType { + override int getSize() { result = 32 } + + override string getName() { result = "int32" } +} + +/** The type `int64`. */ +class Int64Type extends @int64type, SignedIntegerType { + override int getSize() { result = 64 } + + override string getName() { result = "int64" } +} + +/** An unsigned integer type such as `uint`. */ +class UnsignedIntegerType extends @unsignedintegertype, IntegerType { } + +/** The type `uint`. */ +class UintType extends @uinttype, UnsignedIntegerType { + override int getASize() { result = 32 or result = 64 } + + override string getName() { result = "uint" } +} + +/** The type `uint8`. */ +class Uint8Type extends @uint8type, UnsignedIntegerType { + override int getSize() { result = 8 } + + override string getName() { result = "uint8" } +} + +/** The type `uint16`. */ +class Uint16Type extends @uint16type, UnsignedIntegerType { + override int getSize() { result = 16 } + + override string getName() { result = "uint16" } +} + +/** The type `uint32`. */ +class Uint32Type extends @uint32type, UnsignedIntegerType { + override int getSize() { result = 32 } + + override string getName() { result = "uint32" } +} + +/** The type `uint64`. */ +class Uint64Type extends @uint64type, UnsignedIntegerType { + override int getSize() { result = 64 } + + override string getName() { result = "uint64" } +} + +/** The type `uintptr`. */ +class UintptrType extends @uintptrtype, BasicType { + override string getName() { result = "uintptr" } +} + +/** A floating-point type such as `float64`. */ +class FloatType extends @floattype, NumericType { } + +/** The type `float32`. */ +class Float32Type extends @float32type, FloatType { + override int getSize() { result = 32 } + + override string getName() { result = "float32" } +} + +/** The type `float64`. */ +class Float64Type extends @float64type, FloatType { + override int getSize() { result = 64 } + + override string getName() { result = "float64" } +} + +/** A complex-number type such as `complex64`. */ +class ComplexType extends @complextype, NumericType { } + +/** The type `complex64`. */ +class Complex64Type extends @complex64type, ComplexType { + override int getSize() { result = 64 } + + override string getName() { result = "complex64" } +} + +/** The type `complex128`. */ +class Complex128Type extends @complex128type, ComplexType { + override int getSize() { result = 128 } + + override string getName() { result = "complex128" } +} + +/** Either the normal or literal string type */ +class StringType extends @stringtype, BasicType { } + +/** The `string` type of a non-literal expression */ +class StringExprType extends @stringexprtype, StringType { + override string getName() { result = "string" } +} + +/** The type `unsafe.Pointer`. */ +class UnsafePointerType extends @unsafepointertype, BasicType { + override string getName() { result = "unsafe.Pointer" } +} + +/** The type of a literal. */ +class LiteralType extends @literaltype, BasicType { } + +/** The type of a bool literal. */ +class BoolLiteralType extends @boolliteraltype, LiteralType, BoolType { + override string toString() { result = "bool literal" } +} + +/** The type of an integer literal. */ +class IntLiteralType extends @intliteraltype, LiteralType, SignedIntegerType { + override string toString() { result = "int literal" } +} + +/** The type of a rune literal. */ +class RuneLiteralType extends @runeliteraltype, LiteralType, SignedIntegerType { + override string toString() { result = "rune literal" } +} + +/** The type of a float literal. */ +class FloatLiteralType extends @floatliteraltype, LiteralType, FloatType { + override string toString() { result = "float literal" } +} + +/** The type of a complex literal. */ +class ComplexLiteralType extends @complexliteraltype, LiteralType, ComplexType { + override string toString() { result = "complex literal" } +} + +/** The type of a string literal. */ +class StringLiteralType extends @stringliteraltype, LiteralType, StringType { + override string toString() { result = "string literal" } +} + +/** The type of `nil`. */ +class NilLiteralType extends @nilliteraltype, LiteralType { + override string toString() { result = "nil literal" } +} + +/** A composite type, that is, not a basic type. */ +class CompositeType extends @compositetype, Type { } + +/** An array type. */ +class ArrayType extends @arraytype, CompositeType { + /** Gets the element type of this array type. */ + Type getElementType() { element_type(this, result) } + + /** Gets the length of this array type as a string. */ + string getLengthString() { array_length(this, result) } + + /** Gets the length of this array type if it can be represented as a QL integer. */ + int getLength() { result = getLengthString().toInt() } + + override Package getPackage() { result = this.getElementType().getPackage() } + + override string pp() { result = "[" + getLength() + "]" + getElementType().pp() } + + override string toString() { result = "array type" } +} + +/** A slice type. */ +class SliceType extends @slicetype, CompositeType { + /** Gets the element type of this slice type. */ + Type getElementType() { element_type(this, result) } + + override Package getPackage() { result = this.getElementType().getPackage() } + + override string pp() { result = "[]" + getElementType().pp() } + + override string toString() { result = "slice type" } +} + +/** A byte slice type */ +class ByteSliceType extends SliceType { + ByteSliceType() { this.getElementType() instanceof Uint8Type } +} + +/** A struct type. */ +class StructType extends @structtype, CompositeType { + /** + * Holds if this struct contains a field `name` with type `tp`; + * `isEmbedded` is true if the field is embedded. + * + * Note that this predicate does not take promoted fields into account. + */ + predicate hasOwnField(int i, string name, Type tp, boolean isEmbedded) { + exists(string n | component_types(this, i, n, tp) | + if n = "" + then ( + isEmbedded = true and + ( + name = tp.(NamedType).getName() + or + name = tp.(PointerType).getBaseType().(NamedType).getName() + ) + ) else ( + isEmbedded = false and + name = n + ) + ) + } + + /** + * Get a field with the name `name`; `isEmbedded` is true if the field is embedded. + * + * Note that this does not take promoted fields into account. + */ + Field getOwnField(string name, boolean isEmbedded) { + result.getDeclaringType() = this and + result.getName() = name and + this.hasOwnField(_, name, _, isEmbedded) + } + + /** + * Holds if there is an embedded field at `depth`, with either type `tp` or a pointer to `tp`. + */ + private predicate hasEmbeddedField(Type tp, int depth) { + exists(Field f | this.hasFieldCand(_, f, depth, true) | + tp = f.getType() or + tp = f.getType().(PointerType).getBaseType() + ) + } + + /** + * Gets a field of `embeddedParent`, which is then embedded into this struct type. + */ + Field getFieldOfEmbedded(Field embeddedParent, string name, int depth, boolean isEmbedded) { + // embeddedParent is a field of 'this' at depth 'depth - 1' + this.hasFieldCand(_, embeddedParent, depth - 1, true) and + // embeddedParent's type has the result field + exists(StructType embeddedType, Type fieldType | + fieldType = embeddedParent.getType().getUnderlyingType() and + pragma[only_bind_into](embeddedType) = + [fieldType, fieldType.(PointerType).getBaseType().getUnderlyingType()] + | + result = embeddedType.getOwnField(name, isEmbedded) + ) + } + + /** + * Gets a method of `embeddedParent`, which is then embedded into this struct type. + */ + Method getMethodOfEmbedded(Field embeddedParent, string name, int depth) { + // embeddedParent is a field of 'this' at depth 'depth - 1' + this.hasFieldCand(_, embeddedParent, depth - 1, true) and + result.getName() = name and + ( + result.getReceiverBaseType() = embeddedParent.getType() + or + result.getReceiverBaseType() = embeddedParent.getType().(PointerType).getBaseType() + or + methodhosts(result, embeddedParent.getType()) + ) + } + + private predicate hasFieldCand(string name, Field f, int depth, boolean isEmbedded) { + f = this.getOwnField(name, isEmbedded) and depth = 0 + or + not this.hasOwnField(_, name, _, _) and + f = this.getFieldOfEmbedded(_, name, depth, isEmbedded) + } + + private predicate hasMethodCand(string name, Method m, int depth) { + name = m.getName() and + exists(Type embedded | this.hasEmbeddedField(embedded, depth - 1) | + m.getReceiverType() = embedded + ) + } + + /** + * Holds if this struct contains a field `name` with type `tp`, possibly inside a (nested) + * embedded field. + */ + predicate hasField(string name, Type tp) { + exists(int mindepth | + mindepth = min(int depth | this.hasFieldCand(name, _, depth, _)) and + tp = unique(Field f | f = this.getFieldCand(name, mindepth, _)).getType() + ) + } + + private Field getFieldCand(string name, int depth, boolean isEmbedded) { + result = this.getOwnField(name, isEmbedded) and depth = 0 + or + exists(Type embedded | hasEmbeddedField(embedded, depth - 1) | + result = embedded.getUnderlyingType().(StructType).getOwnField(name, isEmbedded) + ) + } + + override Field getField(string name) { result = getFieldAtDepth(name, _) } + + /** + * Gets the field `f` with depth `depth` of this type. + * + * This includes fields promoted from an embedded field. It is not possible + * to access a field that is shadowed by a promoted field with this function. + * The number of embedded fields traversed to reach `f` is called its depth. + * The depth of a field `f` declared in this type is zero. + */ + Field getFieldAtDepth(string name, int depth) { + depth = min(int depthCand | exists(getFieldCand(name, depthCand, _))) and + result = getFieldCand(name, depth, _) and + strictcount(getFieldCand(name, depth, _)) = 1 + } + + Method getMethodAtDepth(string name, int depth) { + depth = min(int depthCand | hasMethodCand(name, _, depthCand)) and + result = unique(Method m | hasMethodCand(name, m, depth)) + } + + override predicate hasMethod(string name, SignatureType tp) { + exists(int mindepth | + mindepth = min(int depth | this.hasMethodCand(name, _, depth)) and + tp = unique(Method m | this.hasMethodCand(name, m, mindepth)).getType() + ) + } + + language[monotonicAggregates] + override string pp() { + result = + "struct { " + + concat(int i, string name, Type tp | + component_types(this, i, name, tp) + | + name + " " + tp.pp(), "; " order by i + ) + " }" + } + + override string toString() { result = "struct type" } +} + +/** A pointer type. */ +class PointerType extends @pointertype, CompositeType { + /** Gets the base type of this pointer type. */ + Type getBaseType() { base_type(this, result) } + + override Package getPackage() { result = this.getBaseType().getPackage() } + + override Method getMethod(string m) { + result = CompositeType.super.getMethod(m) + or + // https://golang.org/ref/spec#Method_sets: "the method set of a pointer type *T is + // the set of all methods declared with receiver *T or T" + result = getBaseType().getMethod(m) + or + // promoted methods from embedded types + exists(StructType s, Type embedded | + s = getBaseType().(NamedType).getUnderlyingType() and + s.hasOwnField(_, _, embedded, true) and + // ensure that `m` can be promoted + not s.hasOwnField(_, m, _, _) and + not exists(Method m2 | m2.getReceiverBaseType() = getBaseType() and m2.getName() = m) + | + result = embedded.getMethod(m) + or + // If S contains an embedded field T, the method set of *S includes promoted methods with receiver T or T* + not embedded instanceof PointerType and + result = embedded.getPointerType().getMethod(m) + or + // If S contains an embedded field *T, the method set of *S includes promoted methods with receiver T or *T + result = embedded.(PointerType).getBaseType().getMethod(m) + ) + } + + override string pp() { result = "* " + getBaseType().pp() } + + override string toString() { result = "pointer type" } +} + +/** An interface type. */ +class InterfaceType extends @interfacetype, CompositeType { + /** Gets the type of method `name` of this interface type. */ + Type getMethodType(string name) { component_types(this, _, name, result) } + + override predicate hasMethod(string m, SignatureType t) { t = getMethodType(m) } + + language[monotonicAggregates] + override string pp() { + result = + "interface { " + + concat(string name, Type tp | + tp = getMethodType(name) + | + name + " " + tp.pp(), "; " order by name + ) + " }" + } + + override string toString() { result = "interface type" } +} + +/** A tuple type. */ +class TupleType extends @tupletype, CompositeType { + /** Gets the `i`th component type of this tuple type. */ + Type getComponentType(int i) { component_types(this, i, _, result) } + + language[monotonicAggregates] + override string pp() { + result = + "(" + concat(int i, Type tp | tp = getComponentType(i) | tp.pp(), ", " order by i) + ")" + } + + override string toString() { result = "tuple type" } +} + +/** A signature type. */ +class SignatureType extends @signaturetype, CompositeType { + /** Gets the `i`th parameter type of this signature type. */ + Type getParameterType(int i) { i >= 0 and component_types(this, i + 1, _, result) } + + /** Gets the `i`th result type of this signature type. */ + Type getResultType(int i) { i >= 0 and component_types(this, -(i + 1), _, result) } + + /** Gets the number of parameters specified by this signature. */ + int getNumParameter() { result = count(int i | exists(getParameterType(i))) } + + /** Gets the number of results specified by this signature. */ + int getNumResult() { result = count(int i | exists(getResultType(i))) } + + language[monotonicAggregates] + override string pp() { + result = + "func(" + concat(int i, Type tp | tp = getParameterType(i) | tp.pp(), ", " order by i) + ") " + + concat(int i, Type tp | tp = getResultType(i) | tp.pp(), ", " order by i) + } + + override string toString() { result = "signature type" } +} + +/** A map type. */ +class MapType extends @maptype, CompositeType { + /** Gets the key type of this map type. */ + Type getKeyType() { key_type(this, result) } + + /** Gets the value type of this map type. */ + Type getValueType() { element_type(this, result) } + + override string pp() { result = "[" + getKeyType().pp() + "]" + getValueType().pp() } + + override string toString() { result = "map type" } +} + +/** A channel type. */ +class ChanType extends @chantype, CompositeType { + /** Gets the element type of this channel type. */ + Type getElementType() { element_type(this, result) } + + /** Holds if this channel can send data. */ + predicate canSend() { none() } + + /** Holds if this channel can receive data. */ + predicate canReceive() { none() } +} + +/** A channel type that can only send. */ +class SendChanType extends @sendchantype, ChanType { + override predicate canSend() { any() } + + override string pp() { result = "chan<- " + getElementType().pp() } + + override string toString() { result = "send-channel type" } +} + +/** A channel type that can only receive. */ +class RecvChanType extends @recvchantype, ChanType { + override predicate canReceive() { any() } + + override string pp() { result = "<-chan " + getElementType().pp() } + + override string toString() { result = "receive-channel type" } +} + +/** A channel type that can both send and receive. */ +class SendRecvChanType extends @sendrcvchantype, ChanType { + override predicate canSend() { any() } + + override predicate canReceive() { any() } + + override string pp() { result = "chan " + getElementType().pp() } + + override string toString() { result = "send-receive-channel type" } +} + +/** A named type. */ +class NamedType extends @namedtype, CompositeType { + /** Gets the type which this type is defined to be. */ + Type getBaseType() { underlying_type(this, result) } + + override Method getMethod(string m) { + result = CompositeType.super.getMethod(m) + or + methodhosts(result, this) and + result.getName() = m + or + // handle promoted methods + exists(StructType s, Type embedded | + s = getBaseType() and + s.hasOwnField(_, _, embedded, true) and + // ensure `m` can be promoted + not s.hasOwnField(_, m, _, _) and + not exists(Method m2 | m2.getReceiverType() = this and m2.getName() = m) + | + // If S contains an embedded field T, the method set of S includes promoted methods with receiver T + result = embedded.getMethod(m) + or + // If S contains an embedded field *T, the method set of S includes promoted methods with receiver T or *T + result = embedded.(PointerType).getBaseType().getMethod(m) + ) + } + + override Type getUnderlyingType() { result = getBaseType().getUnderlyingType() } +} + +/** + * A type that implements the builtin interface `error`. + */ +class ErrorType extends Type { + ErrorType() { this.implements(Builtin::error().getType().getUnderlyingType()) } +} + +/** + * Holds if `i` is the empty interface type, which is implemented by every type with a method set. + */ +pragma[noinline] +private predicate isEmptyInterface(InterfaceType i) { not i.hasMethod(_, _) } + +/** + * Gets the name of a method in the method set of `i`. + * + * This is used to restrict the set of interfaces to consider in the definition of `implements`, + * so it does not matter which method name is chosen (we use the lexicographically least). + */ +private string getExampleMethodName(InterfaceType i) { result = min(string m | i.hasMethod(m, _)) } diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/Util.qll b/repo-tests/codeql-go/ql/lib/semmle/go/Util.qll new file mode 100644 index 00000000000..ff597cdcc28 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/Util.qll @@ -0,0 +1,18 @@ +/** This module provides general utility classes and predicates. */ + +/** + * A Boolean value. + * + * This is a self-binding convenience wrapper for `boolean`. + */ +class Boolean extends boolean { + Boolean() { this = true or this = false } +} + +/** + * Gets a regexp pattern that matches common top-level domain names. + */ +string commonTLD() { + // according to ranking by http://google.com/search?q=site:.<> + result = "(?:com|org|edu|gov|uk|net|io)(?![a-z0-9])" +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/VariableWithFields.qll b/repo-tests/codeql-go/ql/lib/semmle/go/VariableWithFields.qll new file mode 100644 index 00000000000..adb5e2b308a --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/VariableWithFields.qll @@ -0,0 +1,198 @@ +/** Provides the `VariableWithFields` class, for working with variables with a chain of field or element accesses chained to it. */ + +import go + +private newtype TVariableWithFields = + TVariableRoot(Variable v) or + TVariableFieldStep(VariableWithFields base, Field f) { + exists(fieldAccessPathAux(base, f)) or exists(fieldWriteAccessPathAux(base, f)) + } or + TVariableElementStep(VariableWithFields base, string e) { + exists(elementAccessPathAux(base, e)) or exists(elementWriteAccessPathAux(base, e)) + } + +/** + * Gets a representation of the write target `wt` as a variable with fields value if there is one. + */ +private TVariableWithFields writeAccessPath(IR::WriteTarget wt) { + exists(Variable v | wt = v.getAWrite().getLhs() | result = TVariableRoot(v)) + or + exists(VariableWithFields base, Field f | wt = fieldWriteAccessPathAux(base, f) | + result = TVariableFieldStep(base, f) + ) + or + exists(VariableWithFields base, string e | wt = elementWriteAccessPathAux(base, e) | + result = TVariableElementStep(base, e) + ) +} + +/** + * Gets a representation of `insn` as a variable with fields value if there is one. + */ +private TVariableWithFields accessPath(IR::Instruction insn) { + exists(Variable v | insn = v.getARead().asInstruction() | result = TVariableRoot(v)) + or + exists(VariableWithFields base, Field f | insn = fieldAccessPathAux(base, f) | + result = TVariableFieldStep(base, f) + ) + or + exists(VariableWithFields base, string e | insn = elementAccessPathAux(base, e) | + result = TVariableElementStep(base, e) + ) +} + +/** + * Gets an IR instruction that reads a field `f` from a node that is represented + * by variable with fields value `base`. + */ +private IR::Instruction fieldAccessPathAux(TVariableWithFields base, Field f) { + exists(IR::FieldReadInstruction fr, IR::Instruction frb | + fr.getBase() = frb or + fr.getBase() = IR::implicitDerefInstruction(frb.(IR::EvalInstruction).getExpr()) + | + base = accessPath(frb) and + f = fr.getField() and + result = fr + ) +} + +/** + * Gets an IR write target that represents a field `f` from a node that is represented + * by variable with fields value `base`. + */ +private IR::WriteTarget fieldWriteAccessPathAux(TVariableWithFields base, Field f) { + exists(IR::FieldTarget ft, IR::Instruction ftb | + ft.getBase() = ftb or + ft.getBase() = IR::implicitDerefInstruction(ftb.(IR::EvalInstruction).getExpr()) + | + base = accessPath(ftb) and + ft.getField() = f and + result = ft + ) +} + +/** + * Gets an IR instruction that reads an element `e` from a node that is represented + * by variable with fields value `base`. + */ +private IR::Instruction elementAccessPathAux(TVariableWithFields base, string e) { + exists(IR::ElementReadInstruction er, IR::EvalInstruction erb | + er.getBase() = erb or + er.getBase() = IR::implicitDerefInstruction(erb.getExpr()) + | + base = accessPath(erb) and + e = er.getIndex().getExactValue() and + result = er + ) +} + +/** + * Gets an IR write target that represents an element `e` from a node that is represented + * by variable with fields value `base`. + */ +private IR::WriteTarget elementWriteAccessPathAux(TVariableWithFields base, string e) { + exists(IR::ElementTarget et, IR::EvalInstruction etb | + et.getBase() = etb or + et.getBase() = IR::implicitDerefInstruction(etb.getExpr()) + | + base = accessPath(etb) and + e = et.getIndex().getExactValue() and + result = et + ) +} + +/** A variable with zero or more fields or elements read from it. */ +class VariableWithFields extends TVariableWithFields { + /** + * Gets the variable corresponding to the base of this variable with fields. + * + * For example, the variable corresponding to `a` for the variable with fields + * corresponding to `a.b[c]`. + */ + Variable getBaseVariable() { this.getParent*() = TVariableRoot(result) } + + /** + * Gets the variable with fields corresponding to the parent of this variable with fields. + * + * For example, the variable with fields corresponding to `a.b` for the variable with fields + * corresponding to `a.b[c]`. + */ + VariableWithFields getParent() { + exists(VariableWithFields base | + this = TVariableFieldStep(base, _) or this = TVariableElementStep(base, _) + | + result = base + ) + } + + /** Gets a use that refers to this variable with fields. */ + DataFlow::Node getAUse() { this = accessPath(result.asInstruction()) } + + /** Gets the type of this variable with fields. */ + Type getType() { + exists(IR::Instruction acc | this = accessPath(acc) | result = acc.getResultType()) + } + + /** Gets a textual representation of this element. */ + string toString() { + exists(Variable var | this = TVariableRoot(var) | result = "(" + var + ")") + or + exists(VariableWithFields base, Field f | this = TVariableFieldStep(base, f) | + result = base + "." + f.getName() + ) + or + exists(VariableWithFields base, string e | this = TVariableElementStep(base, e) | + result = base + "[" + e + "]" + ) + } + + /** + * Gets the qualified name of the source variable or variable and fields that this represents. + * + * For example, for the variable with fields that represents the field `a.b[c]`, this would get the string + * `"a.b.c"`. + */ + string getQualifiedName() { + exists(Variable v | this = TVariableRoot(v) | result = v.getName()) + or + exists(VariableWithFields base, Field f | this = TVariableFieldStep(base, f) | + result = base.getQualifiedName() + "." + f.getName() + ) + or + exists(VariableWithFields base, string e | this = TVariableElementStep(base, e) | + result = base.getQualifiedName() + "." + e.replaceAll(".", "\\.") + ) + } + + /** + * Gets a write of this variable with fields. + */ + Write getAWrite() { this = writeAccessPath(result.getLhs()) } + + /** + * Gets the field that is the last step of this variable with fields, if any. + * + * For example, the field `c` for the variable with fields `a.b.c`. + */ + Field getField() { this = TVariableFieldStep(_, result) } + + /** + * Gets the element that this variable with fields reads, if any. + * + * For example, the string value of `c` for the variable with fields `a.b[c]`. + */ + string getElement() { this = TVariableElementStep(_, result) } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getBaseVariable().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/concepts/GeneratedFile.qll b/repo-tests/codeql-go/ql/lib/semmle/go/concepts/GeneratedFile.qll new file mode 100644 index 00000000000..c70d3104282 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/concepts/GeneratedFile.qll @@ -0,0 +1,50 @@ +/** Provides a class for generated files. */ + +import go + +/** Provides a class for generated files. */ +module GeneratedFile { + /** + * A file that has been generated. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `GeneratedFile` instead. + */ + abstract class Range extends File { } + + private string generatorCommentRegex() { + result = "Generated By\\b.*\\bDo not edit" or + result = + "This (file|class|interface|art[ei]fact) (was|is|(has been)) (?:auto[ -]?)?gener(e?)ated" or + result = "Any modifications to this file will be lost" or + result = + "This (file|class|interface|art[ei]fact) (was|is) (?:mechanically|automatically) generated" or + result = "The following code was (?:auto[ -]?)?generated (?:by|from)" or + result = "Autogenerated by Thrift" or + result = "(Code g|G)enerated from .* by ANTLR" + } + + private class CommentHeuristicGeneratedFile extends Range { + CommentHeuristicGeneratedFile() { + exists(Comment c | c.getFile() = this | + c.getText().regexpMatch("(?i).*\\b(" + concat(generatorCommentRegex(), "|") + ")\\b.*") + or + // regular expression recommended for Go code generators + // (https://golang.org/pkg/cmd/go/internal/generate/) + c.getText().regexpMatch("^\\s*Code generated .* DO NOT EDIT\\.\\s*$") + ) + } + } +} + +/** + * A file that has been generated. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `GeneratedFile::Range` instead. + */ +class GeneratedFile extends File { + GeneratedFile::Range self; + + GeneratedFile() { this = self } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/concepts/HTTP.qll b/repo-tests/codeql-go/ql/lib/semmle/go/concepts/HTTP.qll new file mode 100644 index 00000000000..3624060ffbd --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/concepts/HTTP.qll @@ -0,0 +1,380 @@ +/** + * Provides classes for working with HTTP-related concepts such as requests and responses. + */ + +import go + +/** Provides classes for modeling HTTP-related APIs. */ +module HTTP { + /** Provides a class for modeling new HTTP response-writer APIs. */ + module ResponseWriter { + /** + * A variable that is an HTTP response writer. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::ResponseWriter` instead. + */ + abstract class Range extends Variable { + /** + * Gets a data-flow node that is a use of this response writer. + * + * Note that `PostUpdateNode`s for nodes that this predicate gets do not need to be + * included, as they are handled by the concrete `ResponseWriter`'s `getANode`. + */ + abstract DataFlow::Node getANode(); + } + } + + /** + * A variable that is an HTTP response writer. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::ResponseWriter::Range` instead. + */ + class ResponseWriter extends Variable { + ResponseWriter::Range self; + + ResponseWriter() { this = self } + + /** Gets the body that is written in this HTTP response. */ + ResponseBody getBody() { result.getResponseWriter() = this } + + /** Gets a header write that is written in this HTTP response. */ + HeaderWrite getAHeaderWrite() { result.getResponseWriter() = this } + + /** Gets a redirect that is sent in this HTTP response. */ + Redirect getARedirect() { result.getResponseWriter() = this } + + /** Gets a data-flow node that is a use of this response writer. */ + DataFlow::Node getANode() { + result = self.getANode() or + result.(DataFlow::PostUpdateNode).getPreUpdateNode() = self.getANode() + } + } + + /** Provides a class for modeling new HTTP header-write APIs. */ + module HeaderWrite { + /** + * A data-flow node that represents a write to an HTTP header. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::HeaderWrite` instead. + */ + abstract class Range extends DataFlow::ExprNode { + /** Gets the (lower-case) name of a header set by this definition. */ + string getHeaderName() { result = this.getName().getStringValue().toLowerCase() } + + /** Gets the value of the header set by this definition. */ + string getHeaderValue() { + result = this.getValue().getStringValue() + or + result = this.getValue().getIntValue().toString() + } + + /** Holds if this header write defines the header `header`. */ + predicate definesHeader(string header, string value) { + header = this.getHeaderName() and + value = this.getHeaderValue() + } + + /** + * Gets the node representing the name of the header defined by this write. + * + * Note that a `HeaderWrite` targeting a constant header (e.g. a routine that always + * sets the `Content-Type` header) may not have such a node, so callers should use + * `getHeaderName` in preference to this method). + */ + abstract DataFlow::Node getName(); + + /** Gets the node representing the value of the header defined by this write. */ + abstract DataFlow::Node getValue(); + + /** Gets the response writer associated with this header write, if any. */ + abstract ResponseWriter getResponseWriter(); + } + } + + /** + * A data-flow node that represents a write to an HTTP header. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::HeaderWrite::Range` instead. + */ + class HeaderWrite extends DataFlow::ExprNode { + HeaderWrite::Range self; + + HeaderWrite() { this = self } + + /** Gets the (lower-case) name of a header set by this definition. */ + string getHeaderName() { result = self.getHeaderName() } + + /** Gets the value of the header set by this definition. */ + string getHeaderValue() { result = self.getHeaderValue() } + + /** Holds if this header write defines the header `header`. */ + predicate definesHeader(string header, string value) { self.definesHeader(header, value) } + + /** + * Gets the node representing the name of the header defined by this write. + * + * Note that a `HeaderWrite` targeting a constant header (e.g. a routine that always + * sets the `Content-Type` header) may not have such a node, so callers should use + * `getHeaderName` in preference to this method). + */ + DataFlow::Node getName() { result = self.getName() } + + /** Gets the node representing the value of the header defined by this write. */ + DataFlow::Node getValue() { result = self.getValue() } + + /** Gets the response writer associated with this header write, if any. */ + ResponseWriter getResponseWriter() { result = self.getResponseWriter() } + } + + /** A data-flow node whose value is written to an HTTP header. */ + class Header extends DataFlow::Node { + HeaderWrite hw; + + Header() { + this = hw.getName() + or + this = hw.getValue() + } + + /** Gets the response writer associated with this header write, if any. */ + ResponseWriter getResponseWriter() { result = hw.getResponseWriter() } + } + + /** A data-flow node whose value is written to the value of an HTTP header. */ + class HeaderValue extends Header { + HeaderValue() { this = hw.getValue() } + } + + /** A data-flow node whose value is written to the name of an HTTP header. */ + class HeaderName extends Header { + HeaderName() { this = hw.getName() } + } + + /** Provides a class for modeling new HTTP request-body APIs. */ + module RequestBody { + /** + * An expression representing a reader whose content is written to an HTTP request body. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::RequestBody` instead. + */ + abstract class Range extends DataFlow::Node { } + } + + /** + * An expression representing a reader whose content is written to an HTTP request body. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::RequestBody::Range` instead. + */ + class RequestBody extends DataFlow::Node { + RequestBody::Range self; + + RequestBody() { this = self } + } + + /** Provides a class for modeling new HTTP response-body APIs. */ + module ResponseBody { + /** + * An expression which is written to an HTTP response body. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::ResponseBody` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the response writer associated with this header write, if any. */ + abstract ResponseWriter getResponseWriter(); + + /** Gets a content-type associated with this body. */ + string getAContentType() { + exists(HTTP::HeaderWrite hw | hw = getResponseWriter().getAHeaderWrite() | + hw.getHeaderName() = "content-type" and + result = hw.getHeaderValue() + ) + or + result = getAContentTypeNode().getStringValue() + } + + /** Gets a dataflow node for a content-type associated with this body. */ + DataFlow::Node getAContentTypeNode() { + exists(HTTP::HeaderWrite hw | hw = getResponseWriter().getAHeaderWrite() | + hw.getHeaderName() = "content-type" and + result = hw.getValue() + ) + } + } + } + + /** + * An expression which is written to an HTTP response body. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::ResponseBody::Range` instead. + */ + class ResponseBody extends DataFlow::Node { + ResponseBody::Range self; + + ResponseBody() { this = self } + + /** Gets the response writer associated with this header write, if any. */ + ResponseWriter getResponseWriter() { result = self.getResponseWriter() } + + /** Gets a content-type associated with this body. */ + string getAContentType() { result = self.getAContentType() } + + /** Gets a dataflow node for a content-type associated with this body. */ + DataFlow::Node getAContentTypeNode() { result = self.getAContentTypeNode() } + } + + /** Provides a class for modeling new HTTP template response-body APIs. */ + module TemplateResponseBody { + /** + * An expression which is written to an HTTP response body via a template execution. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::ResponseBody` instead. + */ + abstract class Range extends ResponseBody::Range { + /** Gets the read of the variable inside the template where this value is read. */ + abstract HtmlTemplate::TemplateRead getRead(); + } + } + + /** + * An expression which is written to an HTTP response body via a template execution. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::TemplateResponseBody::Range` instead. + */ + class TemplateResponseBody extends ResponseBody { + override TemplateResponseBody::Range self; + + TemplateResponseBody() { this = self } + + /** Gets the read of the variable inside the template where this value is read. */ + HtmlTemplate::TemplateRead getRead() { result = self.getRead() } + } + + /** Provides a class for modeling new HTTP client request APIs. */ + module ClientRequest { + /** + * A call that performs a request to a URL. + * + * Example: An HTTP POST request is a client request that sends some + * `data` to a `url`, where both the headers and the body of the request + * contribute to the `data`. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::ClientRequest` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the URL of the request. + */ + abstract DataFlow::Node getUrl(); + } + } + + /** + * A call that performs a request to a URL. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::ClientRequest::Range` instead. + */ + class ClientRequest extends DataFlow::Node { + ClientRequest::Range self; + + ClientRequest() { this = self } + + /** + * Gets the URL of the request. + */ + DataFlow::Node getUrl() { result = self.getUrl() } + } + + /** Provides a class for modeling new HTTP redirect APIs. */ + module Redirect { + /** + * An HTTP redirect. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::Redirect` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the data-flow node representing the URL being redirected to. */ + abstract DataFlow::Node getUrl(); + + /** Gets the response writer that this redirect is sent on, if any. */ + abstract ResponseWriter getResponseWriter(); + } + + /** + * An assignment of the HTTP Location header, which indicates the location for a + * redirect. + */ + private class LocationHeaderSet extends Range, HeaderWrite { + LocationHeaderSet() { this.getHeaderName() = "location" } + + override DataFlow::Node getUrl() { result = this.getValue() } + + override ResponseWriter getResponseWriter() { result = HeaderWrite.super.getResponseWriter() } + } + + /** + * An HTTP request attribute that is generally not attacker-controllable for + * open redirect exploits; for example, a form field submitted in a POST request. + */ + abstract class UnexploitableSource extends DataFlow::Node { } + } + + /** + * An HTTP redirect. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::Redirect::Range` instead. + */ + class Redirect extends DataFlow::Node { + Redirect::Range self; + + Redirect() { this = self } + + /** Gets the data-flow node representing the URL being redirected to. */ + DataFlow::Node getUrl() { result = self.getUrl() } + + /** Gets the response writer that this redirect is sent on, if any. */ + ResponseWriter getResponseWriter() { result = self.getResponseWriter() } + } + + /** Provides a class for modeling new HTTP handler APIs. */ + module RequestHandler { + /** + * An HTTP request handler. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HTTP::RequestHandler` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets a node that is used in a check that is tested before this handler is run. */ + abstract predicate guardedBy(DataFlow::Node check); + } + } + + /** + * An HTTP request handler. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HTTP::RequestHandler::Range` instead. + */ + class RequestHandler extends DataFlow::Node { + RequestHandler::Range self; + + RequestHandler() { this = self } + + /** Gets a node that is used in a check that is tested before this handler is run. */ + predicate guardedBy(DataFlow::Node check) { self.guardedBy(check) } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/BasicBlocks.qll b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/BasicBlocks.qll new file mode 100644 index 00000000000..39b7590d8a3 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/BasicBlocks.qll @@ -0,0 +1,200 @@ +/** + * Provides classes for working with basic blocks. + */ + +import go +private import ControlFlowGraphImpl + +/** + * Holds if `nd` starts a new basic block. + */ +private predicate startsBB(ControlFlow::Node nd) { + count(nd.getAPredecessor()) != 1 + or + nd.getAPredecessor().isBranch() +} + +/** + * Holds if the first node of basic block `succ` is a control flow + * successor of the last node of basic block `bb`. + */ +private predicate succBB(BasicBlock bb, BasicBlock succ) { succ = bb.getLastNode().getASuccessor() } + +/** + * Holds if the first node of basic block `bb` is a control flow + * successor of the last node of basic block `pre`. + */ +private predicate predBB(BasicBlock bb, BasicBlock pre) { succBB(pre, bb) } + +/** Holds if `bb` is an entry basic block. */ +private predicate entryBB(BasicBlock bb) { bb.getFirstNode().isEntryNode() } + +/** Holds if `bb` is an exit basic block. */ +private predicate exitBB(BasicBlock bb) { bb.getLastNode().isExitNode() } + +cached +private module Internal { + /** + * Holds if `succ` is a control flow successor of `nd` within the same basic block. + */ + private predicate intraBBSucc(ControlFlow::Node nd, ControlFlow::Node succ) { + succ = nd.getASuccessor() and + not startsBB(succ) + } + + /** + * Holds if `nd` is the `i`th node in basic block `bb`. + * + * In other words, `i` is the shortest distance from a node `bb` + * that starts a basic block to `nd` along the `intraBBSucc` relation. + */ + cached + predicate bbIndex(BasicBlock bb, ControlFlow::Node nd, int i) = + shortestDistances(startsBB/1, intraBBSucc/2)(bb, nd, i) + + cached + int bbLength(BasicBlock bb) { result = strictcount(ControlFlow::Node nd | bbIndex(bb, nd, _)) } + + cached + predicate reachableBB(BasicBlock bb) { + entryBB(bb) + or + exists(BasicBlock predBB | succBB(predBB, bb) | reachableBB(predBB)) + } +} + +private import Internal + +/** Holds if `dom` is an immediate dominator of `bb`. */ +cached +private predicate bbIDominates(BasicBlock dom, BasicBlock bb) = + idominance(entryBB/1, succBB/2)(_, dom, bb) + +/** Holds if `dom` is an immediate post-dominator of `bb`. */ +cached +private predicate bbIPostDominates(BasicBlock dom, BasicBlock bb) = + idominance(exitBB/1, predBB/2)(_, dom, bb) + +/** + * A basic block, that is, a maximal straight-line sequence of control flow nodes + * without branches or joins. + * + * At the database level, a basic block is represented by its first control flow node. + */ +class BasicBlock extends TControlFlowNode { + BasicBlock() { startsBB(this) } + + /** Gets a basic block succeeding this one. */ + BasicBlock getASuccessor() { succBB(this, result) } + + /** Gets a basic block preceding this one. */ + BasicBlock getAPredecessor() { result.getASuccessor() = this } + + /** Gets a node in this block. */ + ControlFlow::Node getANode() { result = getNode(_) } + + /** Gets the node at the given position in this block. */ + ControlFlow::Node getNode(int pos) { bbIndex(this, result, pos) } + + /** Gets the first node in this block. */ + ControlFlow::Node getFirstNode() { result = this } + + /** Gets the last node in this block. */ + ControlFlow::Node getLastNode() { result = getNode(length() - 1) } + + /** Gets the length of this block. */ + int length() { result = bbLength(this) } + + /** Gets the basic block that immediately dominates this basic block. */ + ReachableBasicBlock getImmediateDominator() { bbIDominates(result, this) } + + /** Gets the innermost function or file to which this basic block belongs. */ + ControlFlow::Root getRoot() { result = getFirstNode().getRoot() } + + /** Gets a textual representation of this basic block. */ + string toString() { result = "basic block" } + + /** + * Holds if this basic block is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getFirstNode().hasLocationInfo(filepath, startline, startcolumn, _, _) and + getLastNode().hasLocationInfo(_, _, _, endline, endcolumn) + } +} + +/** + * An entry basic block, that is, a basic block whose first node is an entry node. + */ +class EntryBasicBlock extends BasicBlock { + EntryBasicBlock() { entryBB(this) } +} + +/** + * A basic block that is reachable from an entry basic block. + */ +class ReachableBasicBlock extends BasicBlock { + ReachableBasicBlock() { reachableBB(this) } + + /** + * Holds if this basic block strictly dominates `bb`. + */ + cached + predicate strictlyDominates(ReachableBasicBlock bb) { bbIDominates+(this, bb) } + + /** + * Holds if this basic block dominates `bb`. + * + * This predicate is reflexive: each reachable basic block dominates itself. + */ + predicate dominates(ReachableBasicBlock bb) { + bb = this or + strictlyDominates(bb) + } + + /** + * Holds if this basic block strictly post-dominates `bb`. + */ + cached + predicate strictlyPostDominates(ReachableBasicBlock bb) { bbIPostDominates+(this, bb) } + + /** + * Holds if this basic block post-dominates `bb`. + * + * This predicate is reflexive: each reachable basic block post-dominates itself. + */ + predicate postDominates(ReachableBasicBlock bb) { + bb = this or + strictlyPostDominates(bb) + } +} + +/** + * A reachable basic block with more than one predecessor. + */ +class ReachableJoinBlock extends ReachableBasicBlock { + ReachableJoinBlock() { getFirstNode().isJoin() } + + /** + * Holds if this basic block belongs to the dominance frontier of `b`, that is + * `b` dominates a predecessor of this block, but not this block itself. + * + * Algorithm from Cooper et al., "A Simple, Fast Dominance Algorithm" (Figure 5), + * who in turn attribute it to Ferrante et al., "The program dependence graph and + * its use in optimization". + */ + predicate inDominanceFrontierOf(ReachableBasicBlock b) { + b = getAPredecessor() and not b = getImmediateDominator() + or + exists(ReachableBasicBlock prev | inDominanceFrontierOf(prev) | + b = prev.getImmediateDominator() and + not b = getImmediateDominator() + ) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/ControlFlowGraph.qll b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/ControlFlowGraph.qll new file mode 100644 index 00000000000..4bb09adfbad --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/ControlFlowGraph.qll @@ -0,0 +1,290 @@ +/** + * Provides classes for working with a CFG-based program representation. + */ + +import go +private import ControlFlowGraphImpl + +/** Provides helper predicates for mapping btween CFG nodes and the AST. */ +module ControlFlow { + /** A file or function with which a CFG is associated. */ + class Root extends AstNode { + Root() { exists(this.(File).getADecl()) or exists(this.(FuncDef).getBody()) } + + /** Holds if `nd` belongs to this file or function. */ + predicate isRootOf(AstNode nd) { + this = nd.getEnclosingFunction() + or + not exists(nd.getEnclosingFunction()) and + this = nd.getFile() + } + + /** Gets the synthetic entry node of the CFG for this file or function. */ + EntryNode getEntryNode() { result = ControlFlow::entryNode(this) } + + /** Gets the synthetic exit node of the CFG for this file or function. */ + ExitNode getExitNode() { result = ControlFlow::exitNode(this) } + } + + /** + * A node in the intra-procedural control-flow graph of a Go function or file. + * + * Nodes correspond to expressions and statements that compute a value or perform + * an operation (as opposed to providing syntactic structure or type information). + * + * There are also synthetic entry and exit nodes for each Go function and file + * that mark the beginning and the end, respectively, of the execution of the + * function and the loading of the file. + */ + class Node extends TControlFlowNode { + /** Gets a node that directly follows this one in the control-flow graph. */ + Node getASuccessor() { result = CFG::succ(this) } + + /** Gets a node that directly precedes this one in the control-flow graph. */ + Node getAPredecessor() { this = result.getASuccessor() } + + /** Holds if this is a node with more than one successor. */ + predicate isBranch() { strictcount(getASuccessor()) > 1 } + + /** Holds if this is a node with more than one predecessor. */ + predicate isJoin() { strictcount(getAPredecessor()) > 1 } + + /** Holds if this is the first control-flow node in `subtree`. */ + predicate isFirstNodeOf(AstNode subtree) { CFG::firstNode(subtree, this) } + + /** Holds if this node is the (unique) entry node of a function or file. */ + predicate isEntryNode() { this instanceof MkEntryNode } + + /** Holds if this node is the (unique) exit node of a function or file. */ + predicate isExitNode() { this instanceof MkExitNode } + + /** Gets the basic block to which this node belongs. */ + BasicBlock getBasicBlock() { result.getANode() = this } + + /** Holds if this node dominates `dominee` in the control-flow graph. */ + pragma[inline] + predicate dominatesNode(ControlFlow::Node dominee) { + exists(ReachableBasicBlock thisbb, ReachableBasicBlock dbb, int i, int j | + this = thisbb.getNode(i) and dominee = dbb.getNode(j) + | + thisbb.strictlyDominates(dbb) + or + thisbb = dbb and i <= j + ) + } + + /** Gets the innermost function or file to which this node belongs. */ + Root getRoot() { none() } + + /** Gets the file to which this node belongs. */ + File getFile() { hasLocationInfo(result.getAbsolutePath(), _, _, _, _) } + + /** + * Gets a textual representation of this control flow node. + */ + string toString() { result = "control-flow node" } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } + } + + /** + * A control-flow node that initializes or updates the value of a constant, a variable, + * a field, or an (array, slice, or map) element. + */ + class WriteNode extends Node { + IR::WriteInstruction self; + + WriteNode() { this = self } + + /** Gets the left-hand side of this write. */ + IR::WriteTarget getLhs() { result = self.getLhs() } + + /** Gets the right-hand side of this write. */ + DataFlow::Node getRhs() { self.getRhs() = result.asInstruction() } + + /** Holds if this node sets variable or constant `v` to `rhs`. */ + predicate writes(ValueEntity v, DataFlow::Node rhs) { self.writes(v, rhs.asInstruction()) } + + /** Holds if this node defines SSA variable `v` to be `rhs`. */ + predicate definesSsaVariable(SsaVariable v, DataFlow::Node rhs) { + self.getLhs().asSsaVariable() = v and + self.getRhs() = rhs.asInstruction() + } + + /** + * Holds if this node sets the value of field `f` on `base` (or its implicit dereference) to + * `rhs`. + * + * For example, for the assignment `x.width = newWidth`, `base` is either the data-flow node + * corresponding to `x` or (if `x` is a pointer) the data-flow node corresponding to the + * implicit dereference `*x`, `f` is the field referenced by `width`, and `rhs` is the data-flow + * node corresponding to `newWidth`. + */ + predicate writesField(DataFlow::Node base, Field f, DataFlow::Node rhs) { + exists(IR::FieldTarget trg | trg = self.getLhs() | + ( + trg.getBase() = base.asInstruction() or + trg.getBase() = MkImplicitDeref(base.asExpr()) + ) and + trg.getField() = f and + self.getRhs() = rhs.asInstruction() + ) + } + + /** + * Holds if this node sets the value of element `idx` on `base` (or its implicit dereference) + * to `rhs`. + * + * For example, for the assignment `xs[i] = v`, `base` is either the data-flow node + * corresponding to `xs` or (if `xs` is a pointer) the data-flow node corresponding to the + * implicit dereference `*xs`, `index` is the data-flow node corresponding to `i`, and `rhs` + * is the data-flow node corresponding to `base`. + */ + predicate writesElement(DataFlow::Node base, DataFlow::Node index, DataFlow::Node rhs) { + exists(IR::ElementTarget trg | trg = self.getLhs() | + ( + trg.getBase() = base.asInstruction() or + trg.getBase() = MkImplicitDeref(base.asExpr()) + ) and + trg.getIndex() = index.asInstruction() and + self.getRhs() = rhs.asInstruction() + ) + } + + /** + * Holds if this node sets any field or element of `base` to `rhs`. + */ + predicate writesComponent(DataFlow::Node base, DataFlow::Node rhs) { + writesElement(base, _, rhs) or writesField(base, _, rhs) + } + } + + /** + * A control-flow node recording the fact that a certain expression has a known + * Boolean value at this point in the program. + */ + class ConditionGuardNode extends IR::Instruction, MkConditionGuardNode { + Expr cond; + boolean outcome; + + ConditionGuardNode() { this = MkConditionGuardNode(cond, outcome) } + + private predicate ensuresAux(Expr expr, boolean b) { + expr = cond and b = outcome + or + expr = any(ParenExpr par | ensuresAux(par, b)).getExpr() + or + expr = any(NotExpr ne | ensuresAux(ne, b.booleanNot())).getOperand() + or + expr = any(LandExpr land | ensuresAux(land, true)).getAnOperand() and + b = true + or + expr = any(LorExpr lor | ensuresAux(lor, false)).getAnOperand() and + b = false + } + + /** Holds if this guard ensures that the result of `nd` is `b`. */ + predicate ensures(DataFlow::Node nd, boolean b) { + ensuresAux(any(Expr e | nd = DataFlow::exprNode(e)), b) + } + + /** Holds if this guard ensures that `lesser <= greater + bias` holds. */ + predicate ensuresLeq(DataFlow::Node lesser, DataFlow::Node greater, int bias) { + exists(DataFlow::RelationalComparisonNode rel, boolean b | + ensures(rel, b) and + rel.leq(b, lesser, greater, bias) + ) + or + ensuresEq(lesser, greater) and + bias = 0 + } + + /** Holds if this guard ensures that `i = j` holds. */ + predicate ensuresEq(DataFlow::Node i, DataFlow::Node j) { + exists(DataFlow::EqualityTestNode eq, boolean b | + ensures(eq, b) and + eq.eq(b, i, j) + ) + } + + /** Holds if this guard ensures that `i != j` holds. */ + predicate ensuresNeq(DataFlow::Node i, DataFlow::Node j) { + exists(DataFlow::EqualityTestNode eq, boolean b | + ensures(eq, b.booleanNot()) and + eq.eq(b, i, j) + ) + } + + /** + * Holds if this guard dominates basic block `bb`, that is, the guard + * is known to hold at `bb`. + */ + predicate dominates(ReachableBasicBlock bb) { + this = bb.getANode() or + dominates(bb.getImmediateDominator()) + } + + /** + * Gets the condition whose outcome the guard concerns. + */ + Expr getCondition() { result = cond } + + override Root getRoot() { result.isRootOf(cond) } + + override string toString() { result = cond + " is " + outcome } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + cond.hasLocationInfo(filepath, _, _, startline, startcolumn) and + endline = startline and + endcolumn = startcolumn + } + } + + /** + * Gets the entry node of function or file `root`. + */ + Node entryNode(Root root) { result = MkEntryNode(root) } + + /** + * Gets the exit node of function or file `root`. + */ + Node exitNode(Root root) { result = MkExitNode(root) } + + /** + * Holds if the function `f` may return without panicking, exiting the process, or looping forever. + * + * This is defined conservatively, and so may also hold of a function that in fact + * cannot return normally, but never fails to hold of a function that can return normally. + */ + predicate mayReturnNormally(FuncDecl f) { CFG::mayReturnNormally(f.getBody()) } + + /** + * Holds if `pred` is the node for the case `testExpr` in an expression + * switch statement which is switching on `switchExpr`, and `succ` is the + * node to be executed next if the case test succeeds. + */ + predicate isSwitchCaseTestPassingEdge( + ControlFlow::Node pred, ControlFlow::Node succ, Expr switchExpr, Expr testExpr + ) { + CFG::isSwitchCaseTestPassingEdge(pred, succ, switchExpr, testExpr) + } +} + +class Write = ControlFlow::WriteNode; diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/ControlFlowGraphImpl.qll b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/ControlFlowGraphImpl.qll new file mode 100644 index 00000000000..5642a853513 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/ControlFlowGraphImpl.qll @@ -0,0 +1,2070 @@ +/** + * INTERNAL: Analyses should use module `ControlFlowGraph` instead. + * + * Provides predicates for building intra-procedural CFGs. + */ + +import go + +/** A block statement that is not the body of a `switch` or `select` statement. */ +class PlainBlock extends BlockStmt { + PlainBlock() { + not this = any(SwitchStmt sw).getBody() and not this = any(SelectStmt sel).getBody() + } +} + +private predicate notBlankIdent(Expr e) { not e instanceof BlankIdent } + +private predicate pureLvalue(ReferenceExpr e) { not e.isRvalue() } + +/** + * Holds if `e` is a branch condition, including the LHS of a short-circuiting binary operator. + */ +private predicate isCondRoot(Expr e) { + e = any(LogicalBinaryExpr lbe).getLeftOperand() + or + e = any(ForStmt fs).getCond() + or + e = any(IfStmt is).getCond() + or + e = any(ExpressionSwitchStmt ess | not exists(ess.getExpr())).getACase().getAnExpr() +} + +/** + * Holds if `e` is a branch condition or part of a logical binary expression contributing to a + * branch condition. + * + * For example, in `v := (x && y) || (z && w)`, `x` and `(x && y)` and `z` are branch conditions + * (`isCondRoot` holds of them), whereas this predicate also holds of `y` (contributes to condition + * `x && y`) but not of `w` (contributes to the value `v`, but not to any branch condition). + * + * In the context `if (x && y) || (z && w)` then the whole `(x && y) || (z && w)` is a branch condition + * as well as `x` and `(x && y)` and `z` as previously, and this predicate holds of all their + * subexpressions. + */ +private predicate isCond(Expr e) { + isCondRoot(e) or + e = any(LogicalBinaryExpr lbe | isCond(lbe)).getRightOperand() or + e = any(ParenExpr par | isCond(par)).getExpr() +} + +/** + * Holds if `e` implicitly reads the embedded field `implicitField`. + * + * The `index` is the distance from the promoted field. For example, if `A` contains an embedded + * field `B`, `B` contains an embedded field `C` and `C` contains the non-embedded field `x`. + * Then `a.x` implicitly reads `C` with index 1 and `B` with index 2. + */ +private predicate implicitFieldSelectionForField(PromotedSelector e, int index, Field implicitField) { + exists(StructType baseType, PromotedField child, int implicitFieldDepth | + baseType = e.getSelectedStructType() and + ( + e.refersTo(child) + or + implicitFieldSelectionForField(e, implicitFieldDepth + 1, child) + ) + | + child = baseType.getFieldOfEmbedded(implicitField, _, implicitFieldDepth + 1, _) and + exists(PromotedField explicitField, int explicitFieldDepth | + e.refersTo(explicitField) and baseType.getFieldAtDepth(_, explicitFieldDepth) = explicitField + | + index = explicitFieldDepth - implicitFieldDepth + ) + ) +} + +private predicate implicitFieldSelectionForMethod(PromotedSelector e, int index, Field implicitField) { + exists(StructType baseType, PromotedMethod method, int mDepth, int implicitFieldDepth | + baseType = e.getSelectedStructType() and + e.refersTo(method) and + baseType.getMethodAtDepth(_, mDepth) = method and + index = mDepth - implicitFieldDepth + | + method = baseType.getMethodOfEmbedded(implicitField, _, implicitFieldDepth + 1) + or + exists(PromotedField child | + child = baseType.getFieldOfEmbedded(implicitField, _, implicitFieldDepth + 1, _) and + implicitFieldSelectionForMethod(e, implicitFieldDepth + 1, child) + ) + ) +} + +/** + * A node in the intra-procedural control-flow graph of a Go function or file. + * + * There are two kinds of control-flow nodes: + * + * 1. Instructions: these are nodes that correspond to expressions and statements + * that compute a value or perform an operation (as opposed to providing syntactic + * structure or type information). + * 2. Synthetic nodes: + * - Entry and exit nodes for each Go function and file that mark the beginning and the end, + * respectively, of the execution of the function and the loading of the file; + * - Skip nodes that are semantic no-ops, but make CFG construction easier. + */ +cached +newtype TControlFlowNode = + /** + * A control-flow node that represents the evaluation of an expression. + */ + MkExprNode(Expr e) { CFG::hasEvaluationNode(e) } or + /** + * A control-flow node that represents the initialization of an element of a composite literal. + */ + MkLiteralElementInitNode(Expr e) { e = any(CompositeLit lit).getAnElement() } or + /** + * A control-flow node that represents the implicit index of an element in a slice or array literal. + */ + MkImplicitLiteralElementIndex(Expr e) { + exists(CompositeLit lit | not lit instanceof StructLit | + e = lit.getAnElement() and + not e instanceof KeyValueExpr + ) + } or + /** + * A control-flow node that represents a (single) assignment. + * + * Assignments with multiple left-hand sides are split up into multiple assignment nodes, + * one for each left-hand side. Assignments to `_` are not represented in the control-flow graph. + */ + MkAssignNode(AstNode assgn, int i) { + // the `i`th assignment in a (possibly multi-)assignment + notBlankIdent(assgn.(Assignment).getLhs(i)) + or + // the `i`th name declared in a (possibly multi-)declaration specifier + notBlankIdent(assgn.(ValueSpec).getNameExpr(i)) + or + // the assignment to the "key" variable in a `range` statement + notBlankIdent(assgn.(RangeStmt).getKey()) and i = 0 + or + // the assignment to the "value" variable in a `range` statement + notBlankIdent(assgn.(RangeStmt).getValue()) and i = 1 + } or + /** + * A control-flow node that represents the implicit right-hand side of a compound assignment. + * + * For example, the compound assignment `x += 1` has an implicit right-hand side `x + 1`. + */ + MkCompoundAssignRhsNode(CompoundAssignStmt assgn) or + /** + * A control-flow node that represents the `i`th component of a tuple expression `s`. + */ + MkExtractNode(AstNode s, int i) { + // in an assignment `x, y, z = tuple` + exists(Assignment assgn | + s = assgn and + exists(assgn.getRhs()) and + assgn.getNumLhs() > 1 and + exists(assgn.getLhs(i)) + ) + or + // in a declaration `var x, y, z = tuple` + exists(ValueSpec spec | + s = spec and + exists(spec.getInit()) and + spec.getNumName() > 1 and + exists(spec.getNameExpr(i)) + ) + or + // in a `range` statement + exists(RangeStmt rs | s = rs | + exists(rs.getKey()) and i = 0 + or + exists(rs.getValue()) and i = 1 + ) + or + // in a return statement `return f()` where `f` has multiple return values + exists(ReturnStmt ret, SignatureType rettp | + s = ret and + // the return statement has a single expression + exists(ret.getExpr()) and + // but the enclosing function has multiple results + rettp = ret.getEnclosingFunction().getType() and + rettp.getNumResult() > 1 and + exists(rettp.getResultType(i)) + ) + or + // in a call `f(g())` where `g` has multiple return values + exists(CallExpr outer, CallExpr inner | s = outer | + inner = outer.getArgument(0).stripParens() and + outer.getNumArgument() = 1 and + exists(inner.getType().(TupleType).getComponentType(i)) + ) + } or + /** + * A control-flow node that represents the zero value to which a variable without an initializer + * expression is initialized. + */ + MkZeroInitNode(ValueEntity v) { + exists(ValueSpec spec, int i | + not exists(spec.getAnInit()) and + spec.getNameExpr(i) = v.getDeclaration() + ) + or + exists(v.(ResultVariable).getFunction().getBody()) + } or + /** + * A control-flow node that represents a function declaration. + */ + MkFuncDeclNode(FuncDecl fd) or + /** + * A control-flow node that represents a `defer` statement. + */ + MkDeferNode(DeferStmt def) or + /** + * A control-flow node that represents a `go` statement. + */ + MkGoNode(GoStmt go) or + /** + * A control-flow node that represents the fact that `e` is known to evaluate to + * `outcome`. + */ + MkConditionGuardNode(Expr e, Boolean outcome) { isCondRoot(e) } or + /** + * A control-flow node that represents an increment or decrement statement. + */ + MkIncDecNode(IncDecStmt ids) or + /** + * A control-flow node that represents the implicit right-hand side of an increment or decrement statement. + */ + MkIncDecRhs(IncDecStmt ids) or + /** + * A control-flow node that represents the implicit operand 1 of an increment or decrement statement. + */ + MkImplicitOne(IncDecStmt ids) or + /** + * A control-flow node that represents a return from a function. + */ + MkReturnNode(ReturnStmt ret) or + /** + * A control-flow node that represents the implicit write to a named result variable in a return statement. + */ + MkResultWriteNode(ResultVariable var, int i, ReturnStmt ret) { + ret.getEnclosingFunction().getResultVar(i) = var and + exists(ret.getAnExpr()) + } or + /** + * A control-flow node that represents the implicit read of a named result variable upon returning from + * a function (after any deferred calls have been executed). + */ + MkResultReadNode(ResultVariable var) or + /** + * A control-flow node that represents a no-op. + * + * These control-flow nodes correspond to Go statements that have no runtime semantics other than potentially + * influencing control flow: the branching statements `continue`, `break`, `fallthrough` and `goto`; empty + * blocks; empty statements; and import and type declarations. + */ + MkSkipNode(AstNode skip) { + skip instanceof BranchStmt + or + skip instanceof EmptyStmt + or + skip.(PlainBlock).getNumStmt() = 0 + or + skip instanceof ImportDecl + or + skip instanceof TypeDecl + or + pureLvalue(skip) + or + skip.(CaseClause).getNumStmt() = 0 + or + skip.(CommClause).getNumStmt() = 0 + } or + /** + * A control-flow node that represents a `select` operation. + */ + MkSelectNode(SelectStmt sel) or + /** + * A control-flow node that represents a `send` operation. + */ + MkSendNode(SendStmt send) or + /** + * A control-flow node that represents the initialization of a parameter to its corresponding argument. + */ + MkParameterInit(Parameter parm) { exists(parm.getFunction().getBody()) } or + /** + * A control-flow node that represents the argument corresponding to a parameter. + */ + MkArgumentNode(Parameter parm) { exists(parm.getFunction().getBody()) } or + /** + * A control-flow node that represents the initialization of a result variable to its zero value. + */ + MkResultInit(ResultVariable rv) { exists(rv.getFunction().getBody()) } or + /** + * A control-flow node that represents the operation of retrieving the next (key, value) pair in a + * `range` statement, if any. + */ + MkNextNode(RangeStmt rs) or + /** + * A control-flow node that represents the implicit `true` expression in `switch { ... }`. + */ + MkImplicitTrue(ExpressionSwitchStmt stmt) { not exists(stmt.getExpr()) } or + /** + * A control-flow node that represents the implicit comparison or type check performed by + * the `i`th expression of a case clause `cc`. + */ + MkCaseCheckNode(CaseClause cc, int i) { exists(cc.getExpr(i)) } or + /** + * A control-flow node that represents the implicit lower bound of a slice expression. + */ + MkImplicitLowerSliceBound(SliceExpr sl) { not exists(sl.getLow()) } or + /** + * A control-flow node that represents the implicit upper bound of a simple slice expression. + */ + MkImplicitUpperSliceBound(SliceExpr sl) { not exists(sl.getHigh()) } or + /** + * A control-flow node that represents the implicit max bound of a simple slice expression. + */ + MkImplicitMaxSliceBound(SliceExpr sl) { not exists(sl.getMax()) } or + /** + * A control-flow node that represents the implicit dereference of the base in a field/method + * access, element access, or slice expression. + */ + MkImplicitDeref(Expr e) { + e.getType().getUnderlyingType() instanceof PointerType and + ( + exists(SelectorExpr sel | e = sel.getBase() | + // field accesses through a pointer always implicitly dereference + sel = any(Field f).getAReference() + or + // method accesses only dereference if the receiver is _not_ a pointer + exists(Method m, Type tp | + sel = m.getAReference() and + tp = m.getReceiver().getType().getUnderlyingType() and + not tp instanceof PointerType + ) + ) + or + e = any(IndexExpr ie).getBase() + or + e = any(SliceExpr se).getBase() + ) + } or + /** + * A control-flow node that represents the implicit selection of a field when + * accessing a promoted field. + * + * If that field has a pointer type then this control-flow node also + * represents an implicit dereference of it. + */ + MkImplicitFieldSelection(PromotedSelector e, int i, Field implicitField) { + implicitFieldSelectionForField(e, i, implicitField) or + implicitFieldSelectionForMethod(e, i, implicitField) + } or + /** + * A control-flow node that represents the start of the execution of a function or file. + */ + MkEntryNode(ControlFlow::Root root) or + /** + * A control-flow node that represents the end of the execution of a function or file. + */ + MkExitNode(ControlFlow::Root root) + +/** A representation of the target of a write. */ +newtype TWriteTarget = + /** A write target that is represented explicitly in the AST. */ + MkLhs(TControlFlowNode write, Expr lhs) { + exists(AstNode assgn, int i | write = MkAssignNode(assgn, i) | + lhs = assgn.(Assignment).getLhs(i).stripParens() + or + lhs = assgn.(ValueSpec).getNameExpr(i) + or + exists(RangeStmt rs | rs = assgn | + i = 0 and lhs = rs.getKey().stripParens() + or + i = 1 and lhs = rs.getValue().stripParens() + ) + ) + or + exists(IncDecStmt ids | write = MkIncDecNode(ids) | lhs = ids.getOperand().stripParens()) + or + exists(Parameter parm | write = MkParameterInit(parm) | lhs = parm.getDeclaration()) + or + exists(ResultVariable res | write = MkResultInit(res) | lhs = res.getDeclaration()) + } or + /** A write target for an element in a compound literal, viewed as a field write. */ + MkLiteralElementTarget(MkLiteralElementInitNode elt) or + /** A write target for a returned expression, viewed as a write to the corresponding result variable. */ + MkResultWriteTarget(MkResultWriteNode w) + +/** + * A control-flow node that represents a no-op. + * + * These control-flow nodes correspond to Go statements that have no runtime semantics other than + * potentially influencing control flow: the branching statements `continue`, `break`, + * `fallthrough` and `goto`; empty blocks; empty statements; and import and type declarations. + */ +class SkipNode extends ControlFlow::Node, MkSkipNode { + AstNode skip; + + SkipNode() { this = MkSkipNode(skip) } + + override ControlFlow::Root getRoot() { result.isRootOf(skip) } + + override string toString() { result = "skip" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + skip.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * A control-flow node that represents the start of the execution of a function or file. + */ +class EntryNode extends ControlFlow::Node, MkEntryNode { + ControlFlow::Root root; + + EntryNode() { this = MkEntryNode(root) } + + override ControlFlow::Root getRoot() { result = root } + + override string toString() { result = "entry" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + root.hasLocationInfo(filepath, startline, startcolumn, _, _) and + endline = startline and + endcolumn = startcolumn + } +} + +/** + * A control-flow node that represents the end of the execution of a function or file. + */ +class ExitNode extends ControlFlow::Node, MkExitNode { + ControlFlow::Root root; + + ExitNode() { this = MkExitNode(root) } + + override ControlFlow::Root getRoot() { result = root } + + override string toString() { result = "exit" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + root.hasLocationInfo(filepath, _, _, endline, endcolumn) and + endline = startline and + endcolumn = startcolumn + } +} + +/** + * Provides classes and predicates for computing the control-flow graph. + */ +cached +module CFG { + /** + * The target of a branch statement, which is either the label of a labeled statement or + * the special target `""` referring to the innermost enclosing loop or `switch`. + */ + private class BranchTarget extends string { + BranchTarget() { this = any(LabeledStmt ls).getLabel() or this = "" } + } + + private module BranchTarget { + /** Holds if this is the target of branch statement `stmt` or the label of compound statement `stmt`. */ + BranchTarget of(Stmt stmt) { + exists(BranchStmt bs | bs = stmt | + result = bs.getLabel() + or + not exists(bs.getLabel()) and result = "" + ) + or + exists(LabeledStmt ls | stmt = ls.getStmt() | result = ls.getLabel()) + or + (stmt instanceof LoopStmt or stmt instanceof SwitchStmt or stmt instanceof SelectStmt) and + result = "" + } + } + + private newtype TCompletion = + /** A completion indicating that an expression or statement was evaluated successfully. */ + Done() or + /** + * A completion indicating that an expression was successfully evaluated to Boolean value `b`. + * + * Note that many Boolean expressions are modelled as having completion `Done()` instead. + * Completion `Bool` is only used in contexts where the Boolean value can be determined. + */ + Bool(boolean b) { b = true or b = false } or + /** + * A completion indicating that execution of a (compound) statement ended with a `break` + * statement targeting the given label. + */ + Break(BranchTarget lbl) or + /** + * A completion indicating that execution of a (compound) statement ended with a `continue` + * statement targeting the given label. + */ + Continue(BranchTarget lbl) or + /** + * A completion indicating that execution of a (compound) statement ended with a `fallthrough` + * statement. + */ + Fallthrough() or + /** + * A completion indicating that execution of a (compound) statement ended with a `return` + * statement. + */ + Return() or + /** + * A completion indicating that execution of a statement or expression may have ended with + * a panic being raised. + */ + Panic() + + private Completion normalCompletion() { result.isNormal() } + + private class Completion extends TCompletion { + predicate isNormal() { this = Done() or this = Bool(_) } + + Boolean getOutcome() { this = Done() or this = Bool(result) } + + string toString() { + this = Done() and result = "normal" + or + exists(boolean b | this = Bool(b) | result = b.toString()) + or + exists(BranchTarget lbl | + this = Break(lbl) and result = "break " + lbl + or + this = Continue(lbl) and result = "continue " + lbl + ) + or + this = Fallthrough() and result = "fallthrough" + or + this = Return() and result = "return" + or + this = Panic() and result = "panic" + } + } + + /** + * Holds if `e` should have an evaluation node in the control-flow graph. + * + * Excluded expressions include those not evaluated at runtime (e.g. identifiers, type expressions) + * and some logical expressions that are expressed as control-flow edges rather than having a specific + * evaluation node. + */ + cached + predicate hasEvaluationNode(Expr e) { + // exclude expressions that do not denote a value + not e instanceof TypeExpr and + not e = any(FieldDecl f).getTag() and + not e instanceof KeyValueExpr and + not e = any(SelectorExpr sel).getSelector() and + not e = any(StructLit sl).getKey(_) and + not (e instanceof Ident and not e instanceof ReferenceExpr) and + not (e instanceof SelectorExpr and not e instanceof ReferenceExpr) and + not pureLvalue(e) and + // exclude parentheses, which are purely concrete syntax, and some logical binary expressions + // whose evaluation is implied by control-flow edges without requiring an evaluation node. + not isControlFlowStructural(e) and + // exclude expressions that are not evaluated at runtime + not e = any(ImportSpec is).getPathExpr() and + not e.getParent*() = any(ArrayTypeExpr ate).getLength() and + // sub-expressions of constant expressions are not evaluated (even if they don't look constant + // themselves) + not constRoot(e.getParent+()) + } + + /** + * Holds if `e` is an expression that purely serves grouping or control-flow purposes. + * + * Examples include parenthesized expressions and short-circuiting Boolean expressions used within + * a branch condition (`if` or `for` condition, or as part of a larger boolean expression, e.g. + * in `(x && y) || z`, the `&&` subexpression matches this predicate). + */ + private predicate isControlFlowStructural(Expr e) { + // Some logical binary operators do not need an evaluation node + // (for example, in `if x && y`, we evaluate `x` and then branch straight to either `y` or the + // `else` block, so there is no control-flow step where `x && y` is specifically calculated) + e instanceof LogicalBinaryExpr and + isCond(e) + or + // Purely concrete-syntactic structural expression: + e instanceof ParenExpr + } + + /** + * Gets a constant root, that is, an expression that is constant but whose parent expression is not. + * + * As an exception to the latter, for a control-flow structural expression such as `(c1)` or `c1 && c2` + * where `cn` are constants we still consider the `cn`s to be a constant roots, even though their parent + * expression is also constant. + */ + private predicate constRoot(Expr root) { + exists(Expr c | + c.isConst() and + not c.getParent().(Expr).isConst() and + root = stripStructural(c) + ) + } + + /** + * Strips off any control-flow structural components from `e`. + */ + private Expr stripStructural(Expr e) { + if isControlFlowStructural(e) then result = stripStructural(e.getAChildExpr()) else result = e + } + + private class ControlFlowTree extends AstNode { + predicate firstNode(ControlFlow::Node first) { none() } + + predicate lastNode(ControlFlow::Node last, Completion cmpl) { + // propagate abnormal completion from children + lastNode(this.getAChild(), last, cmpl) and + not cmpl.isNormal() + } + + predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + exists(int i | + lastNode(getChildTreeRanked(i), pred, normalCompletion()) and + firstNode(getChildTreeRanked(i + 1), succ) + ) + } + + final ControlFlowTree getChildTreeRanked(int i) { + exists(int j | + result = getChildTree(j) and + j = rank[i + 1](int k | exists(getChildTree(k))) + ) + } + + ControlFlowTree getFirstChildTree() { result = getChildTreeRanked(0) } + + ControlFlowTree getLastChildTree() { + result = max(ControlFlowTree ch, int j | ch = getChildTree(j) | ch order by j) + } + + ControlFlowTree getChildTree(int i) { none() } + } + + private class AtomicTree extends ControlFlowTree { + ControlFlow::Node nd; + Completion cmpl; + + AtomicTree() { + exists(Expr e | + e = this.(Expr) and + e.isConst() and + nd = mkExprOrSkipNode(this) + | + if e.isPlatformIndependentConstant() and exists(e.getBoolValue()) + then cmpl = Bool(e.getBoolValue()) + else cmpl = Done() + ) + or + this instanceof Ident and + not this.(Expr).isConst() and + nd = mkExprOrSkipNode(this) and + cmpl = Done() + or + this instanceof BreakStmt and + nd = MkSkipNode(this) and + cmpl = Break(BranchTarget::of(this)) + or + this instanceof ContinueStmt and + nd = MkSkipNode(this) and + cmpl = Continue(BranchTarget::of(this)) + or + this instanceof Decl and + nd = MkSkipNode(this) and + cmpl = Done() + or + this instanceof EmptyStmt and + nd = MkSkipNode(this) and + cmpl = Done() + or + this instanceof FallthroughStmt and + nd = MkSkipNode(this) and + cmpl = Fallthrough() + or + this instanceof FuncLit and + nd = MkExprNode(this) and + cmpl = Done() + or + this instanceof PlainBlock and + nd = MkSkipNode(this) and + cmpl = Done() + or + this instanceof SelectorExpr and + not this.(SelectorExpr).getBase() instanceof ValueExpr and + nd = mkExprOrSkipNode(this) and + cmpl = Done() + } + + override predicate firstNode(ControlFlow::Node first) { first = nd } + + override predicate lastNode(ControlFlow::Node last, Completion c) { last = nd and c = cmpl } + } + + abstract private class PostOrderTree extends ControlFlowTree { + abstract ControlFlow::Node getNode(); + + Completion getCompletion() { result = Done() } + + override predicate firstNode(ControlFlow::Node first) { + firstNode(getFirstChildTree(), first) + or + not exists(getChildTree(_)) and + first = getNode() + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + super.lastNode(last, cmpl) + or + last = getNode() and cmpl = getCompletion() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + super.succ(pred, succ) + or + lastNode(getLastChildTree(), pred, normalCompletion()) and + succ = getNode() + } + } + + abstract private class PreOrderTree extends ControlFlowTree { + abstract ControlFlow::Node getNode(); + + override predicate firstNode(ControlFlow::Node first) { first = getNode() } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + super.lastNode(last, cmpl) + or + lastNode(getLastChildTree(), last, cmpl) + or + not exists(getChildTree(_)) and + last = getNode() and + cmpl = Done() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + super.succ(pred, succ) + or + pred = getNode() and + firstNode(getFirstChildTree(), succ) + } + } + + private class WrapperTree extends ControlFlowTree { + WrapperTree() { + this instanceof ConstDecl or + this instanceof DeclStmt or + this instanceof ExprStmt or + this instanceof KeyValueExpr or + this instanceof LabeledStmt or + this instanceof ParenExpr or + this instanceof PlainBlock or + this instanceof VarDecl + } + + override predicate firstNode(ControlFlow::Node first) { firstNode(getFirstChildTree(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + super.lastNode(last, cmpl) + or + lastNode(getLastChildTree(), last, cmpl) + or + exists(LoopStmt ls | this = ls.getBody() | + lastNode(this, last, Continue(BranchTarget::of(ls))) and + cmpl = Done() + ) + } + + override ControlFlowTree getChildTree(int i) { + i = 0 and result = this.(DeclStmt).getDecl() + or + i = 0 and result = this.(ExprStmt).getExpr() + or + result = this.(GenDecl).getSpec(i) + or + exists(KeyValueExpr kv | kv = this | + not kv.getLiteral() instanceof StructLit and + i = 0 and + result = kv.getKey() + or + i = 1 and result = kv.getValue() + ) + or + i = 0 and result = this.(LabeledStmt).getStmt() + or + i = 0 and result = this.(ParenExpr).getExpr() + or + result = this.(PlainBlock).getStmt(i) + } + } + + private class AssignmentTree extends ControlFlowTree { + AssignmentTree() { + this instanceof Assignment or + this instanceof ValueSpec + } + + Expr getLhs(int i) { + result = this.(Assignment).getLhs(i) or + result = this.(ValueSpec).getNameExpr(i) + } + + int getNumLhs() { + result = this.(Assignment).getNumLhs() or + result = this.(ValueSpec).getNumName() + } + + Expr getRhs(int i) { + result = this.(Assignment).getRhs(i) or + result = this.(ValueSpec).getInit(i) + } + + int getNumRhs() { + result = this.(Assignment).getNumRhs() or + result = this.(ValueSpec).getNumInit() + } + + predicate isExtractingAssign() { getNumRhs() = 1 and getNumLhs() > 1 } + + override predicate firstNode(ControlFlow::Node first) { + not this instanceof RecvStmt and + firstNode(getLhs(0), first) + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + ( + last = max(int i | | epilogueNode(i) order by i) + or + not exists(epilogueNode(_)) and + lastNode(getLastSubExprInEvalOrder(), last, normalCompletion()) + ) and + cmpl = Done() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + exists(int i | lastNode(getLhs(i), pred, normalCompletion()) | + firstNode(getLhs(i + 1), succ) + or + not this instanceof RecvStmt and + i = getNumLhs() - 1 and + ( + firstNode(getRhs(0), succ) + or + not exists(getRhs(_)) and + succ = epilogueNodeRanked(0) + ) + ) + or + exists(int i | + lastNode(getRhs(i), pred, normalCompletion()) and + firstNode(getRhs(i + 1), succ) + ) + or + not this instanceof RecvStmt and + lastNode(getRhs(getNumRhs() - 1), pred, normalCompletion()) and + succ = epilogueNodeRanked(0) + or + exists(int i | + pred = epilogueNodeRanked(i) and + succ = epilogueNodeRanked(i + 1) + ) + } + + ControlFlow::Node epilogueNodeRanked(int i) { + exists(int j | + result = epilogueNode(j) and + j = rank[i + 1](int k | exists(epilogueNode(k))) + ) + } + + private Expr getSubExprInEvalOrder(int evalOrder) { + if evalOrder < getNumLhs() + then result = getLhs(evalOrder) + else result = getRhs(evalOrder - getNumLhs()) + } + + private Expr getLastSubExprInEvalOrder() { + result = max(int i | | getSubExprInEvalOrder(i) order by i) + } + + private ControlFlow::Node epilogueNode(int i) { + i = -1 and + result = MkCompoundAssignRhsNode(this) + or + exists(int j | + result = MkExtractNode(this, j) and + i = 2 * j + or + result = MkZeroInitNode(any(ValueEntity v | getLhs(j) = v.getDeclaration())) and + i = 2 * j + or + result = MkAssignNode(this, j) and + i = 2 * j + 1 + ) + } + } + + private class BinaryExprTree extends PostOrderTree, BinaryExpr { + override ControlFlow::Node getNode() { result = MkExprNode(this) } + + private predicate equalityTestMayPanic() { + this instanceof EqualityTestExpr and + exists(Type t | + t = this.getAnOperand().getType().getUnderlyingType() and + ( + t instanceof InterfaceType or // panic due to comparison of incomparable interface values + t instanceof StructType or // may contain an interface-typed field + t instanceof ArrayType // may be an array of interface values + ) + ) + } + + override Completion getCompletion() { + result = PostOrderTree.super.getCompletion() + or + // runtime panic due to division by zero or comparison of incomparable interface values + (this instanceof DivExpr or equalityTestMayPanic()) and + not this.(Expr).isConst() and + result = Panic() + } + + override ControlFlowTree getChildTree(int i) { + i = 0 and result = getLeftOperand() + or + i = 1 and result = getRightOperand() + } + } + + private class LogicalBinaryExprTree extends BinaryExprTree, LogicalBinaryExpr { + boolean shortCircuit; + + LogicalBinaryExprTree() { + this instanceof LandExpr and shortCircuit = false + or + this instanceof LorExpr and shortCircuit = true + } + + private ControlFlow::Node getGuard(boolean outcome) { + result = MkConditionGuardNode(getLeftOperand(), outcome) + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + lastNode(getAnOperand(), last, cmpl) and + not cmpl.isNormal() + or + if isCond(this) + then ( + last = getGuard(shortCircuit) and + cmpl = Bool(shortCircuit) + or + lastNode(getRightOperand(), last, cmpl) + ) else ( + last = MkExprNode(this) and + cmpl = Done() + ) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + exists(Completion lcmpl | + lastNode(getLeftOperand(), pred, lcmpl) and + succ = getGuard(lcmpl.getOutcome()) + ) + or + pred = getGuard(shortCircuit.booleanNot()) and + firstNode(getRightOperand(), succ) + or + not isCond(this) and + ( + pred = getGuard(shortCircuit) and + succ = MkExprNode(this) + or + exists(Completion rcmpl | + lastNode(getRightOperand(), pred, rcmpl) and + rcmpl.isNormal() and + succ = MkExprNode(this) + ) + ) + } + } + + private class CallExprTree extends PostOrderTree, CallExpr { + private predicate isSpecial() { + this = any(DeferStmt defer).getCall() or + this = any(GoStmt go).getCall() + } + + override ControlFlow::Node getNode() { + not isSpecial() and + result = MkExprNode(this) + } + + override Completion getCompletion() { + (not exists(getTarget()) or getTarget().mayReturnNormally()) and + result = Done() + or + (not exists(getTarget()) or getTarget().mayPanic()) and + result = Panic() + } + + override ControlFlowTree getChildTree(int i) { + i = 0 and result = getCalleeExpr() + or + result = getArgument(i - 1) and + // calls to `make` and `new` can have type expressions as arguments + not result instanceof TypeExpr + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + // interpose implicit argument destructuring nodes between last argument + // and call itself; this is for cases like `f(g())` where `g` has multiple + // results + exists(ControlFlow::Node mid | PostOrderTree.super.succ(pred, mid) | + if mid = getNode() then succ = getEpilogueNode(0) else succ = mid + ) + or + exists(int i | + pred = getEpilogueNode(i) and + succ = getEpilogueNode(i + 1) + ) + } + + private ControlFlow::Node getEpilogueNode(int i) { + result = MkExtractNode(this, i) + or + i = max(int j | exists(MkExtractNode(this, j))) + 1 and + result = getNode() + or + not exists(MkExtractNode(this, _)) and + i = 0 and + result = getNode() + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + PostOrderTree.super.lastNode(last, cmpl) + or + isSpecial() and + lastNode(getLastChildTree(), last, cmpl) + } + } + + private class CaseClauseTree extends ControlFlowTree, CaseClause { + private ControlFlow::Node getExprStart(int i) { + firstNode(getExpr(i), result) + or + getExpr(i) instanceof TypeExpr and + result = MkCaseCheckNode(this, i) + } + + ControlFlow::Node getExprEnd(int i, Boolean outcome) { + exists(Expr e | e = getExpr(i) | + result = MkConditionGuardNode(e, outcome) + or + not exists(MkConditionGuardNode(e, _)) and + result = MkCaseCheckNode(this, i) + ) + } + + private ControlFlow::Node getBodyStart() { + firstNode(getStmt(0), result) or result = MkSkipNode(this) + } + + override predicate firstNode(ControlFlow::Node first) { + first = getExprStart(0) + or + not exists(getAnExpr()) and + first = getBodyStart() + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + // TODO: shouldn't be here + last = getExprEnd(getNumExpr() - 1, false) and + cmpl = Bool(false) + or + last = MkSkipNode(this) and + cmpl = Done() + or + lastNode(getStmt(getNumStmt() - 1), last, cmpl) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + exists(int i | + lastNode(getExpr(i), pred, normalCompletion()) and + succ = MkCaseCheckNode(this, i) + or + // visit guard node if there is one + pred = MkCaseCheckNode(this, i) and + succ = getExprEnd(i, _) and + succ != pred // this avoids self-loops if there isn't a guard node + or + pred = getExprEnd(i, false) and + succ = getExprStart(i + 1) + or + isPassingEdge(i, pred, succ, _) + ) + } + + predicate isPassingEdge(int i, ControlFlow::Node pred, ControlFlow::Node succ, Expr testExpr) { + pred = getExprEnd(i, true) and + succ = getBodyStart() and + testExpr = getExpr(i) + } + + override ControlFlowTree getChildTree(int i) { result = getStmt(i) } + } + + private class CommClauseTree extends ControlFlowTree, CommClause { + override predicate firstNode(ControlFlow::Node first) { firstNode(getComm(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + last = MkSkipNode(this) and + cmpl = Done() + or + lastNode(getStmt(getNumStmt() - 1), last, cmpl) + } + + override ControlFlowTree getChildTree(int i) { result = getStmt(i) } + } + + private class CompositeLiteralTree extends ControlFlowTree, CompositeLit { + private ControlFlow::Node getElementInit(int i) { + result = MkLiteralElementInitNode(getElement(i)) + } + + private ControlFlow::Node getElementStart(int i) { + exists(Expr elt | elt = getElement(i) | + result = MkImplicitLiteralElementIndex(elt) + or + (elt instanceof KeyValueExpr or this instanceof StructLit) and + firstNode(getElement(i), result) + ) + } + + override predicate firstNode(ControlFlow::Node first) { first = MkExprNode(this) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + last = getElementInit(getNumElement() - 1) and + cmpl = Done() + or + not exists(getElement(_)) and + last = MkExprNode(this) and + cmpl = Done() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + firstNode(pred) and + succ = getElementStart(0) + or + exists(int i | + pred = MkImplicitLiteralElementIndex(getElement(i)) and + firstNode(getElement(i), succ) + or + lastNode(getElement(i), pred, normalCompletion()) and + succ = getElementInit(i) + or + pred = getElementInit(i) and + succ = getElementStart(i + 1) + ) + } + } + + private class ConversionExprTree extends PostOrderTree, ConversionExpr { + override Completion getCompletion() { + // conversions of a slice to an array pointer are the only kind that may panic + this.getType().(PointerType).getBaseType() instanceof ArrayType and + result = Panic() + or + result = Done() + } + + override ControlFlow::Node getNode() { result = MkExprNode(this) } + + override ControlFlowTree getChildTree(int i) { i = 0 and result = getOperand() } + } + + private class DeferStmtTree extends PostOrderTree, DeferStmt { + override ControlFlow::Node getNode() { result = MkDeferNode(this) } + + override ControlFlowTree getChildTree(int i) { i = 0 and result = getCall() } + } + + private class FuncDeclTree extends PostOrderTree, FuncDecl { + override ControlFlow::Node getNode() { result = MkFuncDeclNode(this) } + + override ControlFlowTree getChildTree(int i) { i = 0 and result = getNameExpr() } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + // override to prevent panic propagation out of function declarations + last = getNode() and cmpl = Done() + } + } + + private class GoStmtTree extends PostOrderTree, GoStmt { + override ControlFlow::Node getNode() { result = MkGoNode(this) } + + override ControlFlowTree getChildTree(int i) { i = 0 and result = getCall() } + } + + private class IfStmtTree extends ControlFlowTree, IfStmt { + private ControlFlow::Node getGuard(boolean outcome) { + result = MkConditionGuardNode(getCond(), outcome) + } + + override predicate firstNode(ControlFlow::Node first) { + firstNode(getInit(), first) + or + not exists(getInit()) and + firstNode(getCond(), first) + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + lastNode(getThen(), last, cmpl) + or + lastNode(getElse(), last, cmpl) + or + not exists(getElse()) and + last = getGuard(false) and + cmpl = Done() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + lastNode(getInit(), pred, normalCompletion()) and + firstNode(getCond(), succ) + or + exists(Completion condCmpl | + lastNode(getCond(), pred, condCmpl) and + succ = MkConditionGuardNode(getCond(), condCmpl.getOutcome()) + ) + or + pred = getGuard(true) and + firstNode(getThen(), succ) + or + pred = getGuard(false) and + firstNode(getElse(), succ) + } + } + + private class IndexExprTree extends ControlFlowTree, IndexExpr { + override predicate firstNode(ControlFlow::Node first) { firstNode(getBase(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + // panic due to `nil` dereference + last = MkImplicitDeref(this.getBase()) and + cmpl = Panic() + or + last = mkExprOrSkipNode(this) and + (cmpl = Done() or cmpl = Panic()) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + lastNode(getBase(), pred, normalCompletion()) and + ( + succ = MkImplicitDeref(this.getBase()) + or + not exists(MkImplicitDeref(this.getBase())) and + firstNode(this.getIndex(), succ) + ) + or + pred = MkImplicitDeref(this.getBase()) and + firstNode(this.getIndex(), succ) + or + lastNode(getIndex(), pred, normalCompletion()) and + succ = mkExprOrSkipNode(this) + } + } + + private class LoopTree extends ControlFlowTree, LoopStmt { + BranchTarget getLabel() { result = BranchTarget::of(this) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + exists(Completion inner | lastNode(getBody(), last, inner) and not inner.isNormal() | + if inner = Break(getLabel()) + then cmpl = Done() + else + if inner = Continue(getLabel()) + then none() + else cmpl = inner + ) + } + } + + private class FileTree extends ControlFlowTree, File { + FileTree() { exists(getADecl()) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { none() } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + pred = MkEntryNode(this) and + firstNode(this.getDecl(0), succ) + or + exists(int i, Completion inner | lastNode(this.getDecl(i), pred, inner) | + not inner.isNormal() + or + i = getNumDecl() - 1 + ) and + succ = MkExitNode(this) + } + + override ControlFlowTree getChildTree(int i) { result = getDecl(i) } + } + + private class ForTree extends LoopTree, ForStmt { + private ControlFlow::Node getGuard(boolean outcome) { + result = MkConditionGuardNode(getCond(), outcome) + } + + override predicate firstNode(ControlFlow::Node first) { firstNode(getFirstChildTree(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + LoopTree.super.lastNode(last, cmpl) + or + lastNode(getInit(), last, cmpl) and + not cmpl.isNormal() + or + lastNode(getCond(), last, cmpl) and + not cmpl.isNormal() + or + lastNode(getPost(), last, cmpl) and + not cmpl.isNormal() + or + last = getGuard(false) and + cmpl = Done() + } + + override ControlFlowTree getChildTree(int i) { + i = 0 and result = getInit() + or + i = 1 and result = getCond() + or + i = 2 and result = getBody() + or + i = 3 and result = getPost() + or + i = 4 and result = getCond() + or + i = 5 and result = getBody() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + exists(int i, ControlFlowTree predTree, Completion cmpl | + predTree = getChildTreeRanked(i) and + lastNode(predTree, pred, cmpl) and + cmpl.isNormal() + | + if predTree = getCond() + then succ = getGuard(cmpl.getOutcome()) + else firstNode(getChildTreeRanked(i + 1), succ) + ) + or + pred = getGuard(true) and + firstNode(getBody(), succ) + } + } + + private class FuncDefTree extends ControlFlowTree, FuncDef { + FuncDefTree() { exists(getBody()) } + + pragma[noinline] + private MkEntryNode getEntry() { result = MkEntryNode(this) } + + private Parameter getParameterRanked(int i) { + result = rank[i + 1](Parameter p, int j | p = getParameter(j) | p order by j) + } + + private ControlFlow::Node getPrologueNode(int i) { + i = -1 and result = getEntry() + or + exists(int numParm, int numRes | + numParm = count(getParameter(_)) and + numRes = count(getResultVar(_)) + | + exists(int j, Parameter p | p = getParameterRanked(j) | + i = 2 * j and result = MkArgumentNode(p) + or + i = 2 * j + 1 and result = MkParameterInit(p) + ) + or + exists(int j, ResultVariable v | v = getResultVar(j) | + i = 2 * numParm + 2 * j and + result = MkZeroInitNode(v) + or + i = 2 * numParm + 2 * j + 1 and + result = MkResultInit(v) + ) + or + i = 2 * numParm + 2 * numRes and + firstNode(getBody(), result) + ) + } + + private ControlFlow::Node getEpilogueNode(int i) { + result = MkResultReadNode(getResultVar(i)) + or + i = count(getAResultVar()) and + result = MkExitNode(this) + } + + pragma[noinline] + private predicate firstDefer(ControlFlow::Node nd) { + exists(DeferStmt defer | + nd = MkExprNode(defer.getCall()) and + // `defer` can be the first `defer` statement executed + // there is always a predecessor node because the `defer`'s call is always + // evaluated before the defer statement itself + MkDeferNode(defer) = succ(notDeferSucc*(getEntry())) + ) + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { none() } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + exists(int i | + pred = getPrologueNode(i) and + succ = getPrologueNode(i + 1) + ) + or + exists(GotoStmt goto, LabeledStmt ls | + pred = MkSkipNode(goto) and + this = goto.getEnclosingFunction() and + this = ls.getEnclosingFunction() and + goto.getLabel() = ls.getLabel() and + firstNode(ls, succ) + ) + or + exists(Completion cmpl | + lastNode(this.getBody(), pred, cmpl) and + // last node of function body can be reached without going through a `defer` statement + pred = notDeferSucc*(getEntry()) + | + // panic goes directly to exit, non-panic reads result variables first + if cmpl = Panic() then succ = MkExitNode(this) else succ = getEpilogueNode(0) + ) + or + lastNode(this.getBody(), pred, _) and + exists(DeferStmt defer | defer = this.getADeferStmt() | + succ = MkExprNode(defer.getCall()) and + // the last `DeferStmt` executed before pred is this `defer` + pred = notDeferSucc*(MkDeferNode(defer)) + ) + or + exists(DeferStmt predDefer, DeferStmt succDefer | + predDefer = this.getADeferStmt() and + succDefer = this.getADeferStmt() + | + // reversed because `defer`s are executed in LIFO order + MkDeferNode(predDefer) = nextDefer(MkDeferNode(succDefer)) and + pred = MkExprNode(predDefer.getCall()) and + succ = MkExprNode(succDefer.getCall()) + ) + or + firstDefer(pred) and + ( + // conservatively assume that we might either panic (and hence skip the result reads) + // or not + succ = MkExitNode(this) + or + succ = getEpilogueNode(0) + ) + or + exists(int i | + pred = getEpilogueNode(i) and + succ = getEpilogueNode(i + 1) + ) + } + } + + private class GotoTree extends ControlFlowTree, GotoStmt { + override predicate firstNode(ControlFlow::Node first) { first = MkSkipNode(this) } + } + + private class IncDecTree extends ControlFlowTree, IncDecStmt { + override predicate firstNode(ControlFlow::Node first) { firstNode(getOperand(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + last = MkIncDecNode(this) and + cmpl = Done() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + lastNode(getOperand(), pred, normalCompletion()) and + succ = MkImplicitOne(this) + or + pred = MkImplicitOne(this) and + succ = MkIncDecRhs(this) + or + pred = MkIncDecRhs(this) and + succ = MkIncDecNode(this) + } + } + + private class RangeTree extends LoopTree, RangeStmt { + override predicate firstNode(ControlFlow::Node first) { firstNode(getDomain(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + LoopTree.super.lastNode(last, cmpl) + or + last = MkNextNode(this) and + cmpl = Done() + or + lastNode(getKey(), last, cmpl) and + not cmpl.isNormal() + or + lastNode(getValue(), last, cmpl) and + not cmpl.isNormal() + or + lastNode(getDomain(), last, cmpl) and + not cmpl.isNormal() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + lastNode(getDomain(), pred, normalCompletion()) and + succ = MkNextNode(this) + or + pred = MkNextNode(this) and + ( + firstNode(getKey(), succ) + or + not exists(getKey()) and + firstNode(getBody(), succ) + ) + or + lastNode(getKey(), pred, normalCompletion()) and + ( + firstNode(getValue(), succ) + or + not exists(getValue()) and + succ = MkExtractNode(this, 0) + ) + or + lastNode(getValue(), pred, normalCompletion()) and + succ = MkExtractNode(this, 0) + or + pred = MkExtractNode(this, 0) and + ( + if exists(getValue()) + then succ = MkExtractNode(this, 1) + else + if exists(MkAssignNode(this, 0)) + then succ = MkAssignNode(this, 0) + else + if exists(MkAssignNode(this, 1)) + then succ = MkAssignNode(this, 1) + else firstNode(getBody(), succ) + ) + or + pred = MkExtractNode(this, 1) and + ( + if exists(MkAssignNode(this, 0)) + then succ = MkAssignNode(this, 0) + else + if exists(MkAssignNode(this, 1)) + then succ = MkAssignNode(this, 1) + else firstNode(getBody(), succ) + ) + or + pred = MkAssignNode(this, 0) and + ( + if exists(MkAssignNode(this, 1)) + then succ = MkAssignNode(this, 1) + else firstNode(getBody(), succ) + ) + or + pred = MkAssignNode(this, 1) and + firstNode(getBody(), succ) + or + exists(Completion inner | + lastNode(getBody(), pred, inner) and + (inner.isNormal() or inner = Continue(BranchTarget::of(this))) and + succ = MkNextNode(this) + ) + } + } + + private class RecvStmtTree extends ControlFlowTree, RecvStmt { + override predicate firstNode(ControlFlow::Node first) { + firstNode(getExpr().getOperand(), first) + } + } + + private class ReturnStmtTree extends PostOrderTree, ReturnStmt { + override ControlFlow::Node getNode() { result = MkReturnNode(this) } + + override Completion getCompletion() { result = Return() } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + exists(int i | + lastNode(getExpr(i), pred, normalCompletion()) and + succ = complete(i) + or + pred = MkExtractNode(this, i) and + succ = after(i) + or + pred = MkResultWriteNode(_, i, this) and + succ = next(i) + ) + } + + private ControlFlow::Node complete(int i) { + result = MkExtractNode(this, i) + or + not exists(MkExtractNode(this, _)) and + result = after(i) + } + + private ControlFlow::Node after(int i) { + result = MkResultWriteNode(_, i, this) + or + not exists(MkResultWriteNode(_, i, this)) and + result = next(i) + } + + private ControlFlow::Node next(int i) { + firstNode(getExpr(i + 1), result) + or + exists(MkExtractNode(this, _)) and + result = complete(i + 1) + or + i + 1 = getEnclosingFunction().getType().getNumResult() and + result = getNode() + } + + override ControlFlowTree getChildTree(int i) { result = getExpr(i) } + } + + private class SelectStmtTree extends ControlFlowTree, SelectStmt { + private BranchTarget getLabel() { result = BranchTarget::of(this) } + + override predicate firstNode(ControlFlow::Node first) { + firstNode(getNonDefaultCommClause(0), first) + or + getNumNonDefaultCommClause() = 0 and + first = MkSelectNode(this) + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + exists(Completion inner | lastNode(getACommClause(), last, inner) | + if inner = Break(getLabel()) then cmpl = Done() else cmpl = inner + ) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + exists(CommClause cc, int i, Stmt comm | + cc = getNonDefaultCommClause(i) and + comm = cc.getComm() and + ( + comm instanceof RecvStmt and + lastNode(comm.(RecvStmt).getExpr().getOperand(), pred, normalCompletion()) + or + comm instanceof SendStmt and + lastNode(comm.(SendStmt).getValue(), pred, normalCompletion()) + ) + | + firstNode(getNonDefaultCommClause(i + 1), succ) + or + i = getNumNonDefaultCommClause() - 1 and + succ = MkSelectNode(this) + ) + or + pred = MkSelectNode(this) and + exists(CommClause cc, Stmt comm | cc = getNonDefaultCommClause(_) and comm = cc.getComm() | + comm instanceof RecvStmt and + succ = MkExprNode(comm.(RecvStmt).getExpr()) + or + comm instanceof SendStmt and + succ = MkSendNode(comm) + ) + or + pred = MkSelectNode(this) and + exists(CommClause cc | cc = getDefaultCommClause() | + firstNode(cc.getStmt(0), succ) + or + succ = MkSkipNode(cc) + ) + or + exists(CommClause cc, RecvStmt recv | cc = getCommClause(_) and recv = cc.getComm() | + pred = MkExprNode(recv.getExpr()) and + ( + firstNode(recv.getLhs(0), succ) + or + not exists(recv.getLhs(0)) and + (firstNode(cc.getStmt(0), succ) or succ = MkSkipNode(cc)) + ) + or + lastNode(recv.getLhs(0), pred, normalCompletion()) and + not exists(recv.getLhs(1)) and + ( + succ = MkAssignNode(recv, 0) + or + not exists(MkAssignNode(recv, 0)) and + (firstNode(cc.getStmt(0), succ) or succ = MkSkipNode(cc)) + ) + or + lastNode(recv.getLhs(1), pred, normalCompletion()) and + succ = MkExtractNode(recv, 0) + or + ( + pred = MkAssignNode(recv, 0) and + not exists(MkExtractNode(recv, 1)) + or + pred = MkExtractNode(recv, 1) and + not exists(MkAssignNode(recv, 1)) + or + pred = MkAssignNode(recv, 1) + ) and + (firstNode(cc.getStmt(0), succ) or succ = MkSkipNode(cc)) + ) + or + exists(CommClause cc, SendStmt ss | + cc = getCommClause(_) and + ss = cc.getComm() and + pred = MkSendNode(ss) + | + firstNode(cc.getStmt(0), succ) + or + succ = MkSkipNode(cc) + ) + } + } + + private class SelectorExprTree extends ControlFlowTree, SelectorExpr { + SelectorExprTree() { this.getBase() instanceof ValueExpr } + + override predicate firstNode(ControlFlow::Node first) { firstNode(this.getBase(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + // panic due to `nil` dereference + last = MkImplicitDeref(this.getBase()) and + cmpl = Panic() + or + last = mkExprOrSkipNode(this) and + cmpl = Done() + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + exists(int i | pred = this.getStepWithRank(i) and succ = this.getStepWithRank(i + 1)) + } + + private ControlFlow::Node getStepOrdered(int i) { + i = -2 and lastNode(this.getBase(), result, normalCompletion()) + or + i = -1 and result = MkImplicitDeref(this.getBase()) + or + exists(int maxIndex | + maxIndex = max(int k | k = 0 or exists(MkImplicitFieldSelection(this, k, _))) + | + result = MkImplicitFieldSelection(this, maxIndex - i, _) + or + i = maxIndex and + result = mkExprOrSkipNode(this) + ) + } + + private ControlFlow::Node getStepWithRank(int i) { + exists(int j | + result = this.getStepOrdered(j) and + j = rank[i + 1](int k | exists(this.getStepOrdered(k))) + ) + } + } + + private class SendStmtTree extends ControlFlowTree, SendStmt { + override predicate firstNode(ControlFlow::Node first) { firstNode(getChannel(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + last = MkSendNode(this) and + (cmpl = Done() or cmpl = Panic()) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + not this = any(CommClause cc).getComm() and + lastNode(getValue(), pred, normalCompletion()) and + succ = MkSendNode(this) + } + + override ControlFlowTree getChildTree(int i) { + i = 0 and result = getChannel() + or + i = 1 and result = getValue() + } + } + + private class SliceExprTree extends ControlFlowTree, SliceExpr { + override predicate firstNode(ControlFlow::Node first) { firstNode(getBase(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + ControlFlowTree.super.lastNode(last, cmpl) + or + // panic due to `nil` dereference + last = MkImplicitDeref(getBase()) and + cmpl = Panic() + or + last = MkExprNode(this) and + (cmpl = Done() or cmpl = Panic()) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + lastNode(getBase(), pred, normalCompletion()) and + ( + succ = MkImplicitDeref(getBase()) + or + not exists(MkImplicitDeref(getBase())) and + (firstNode(getLow(), succ) or succ = MkImplicitLowerSliceBound(this)) + ) + or + pred = MkImplicitDeref(getBase()) and + (firstNode(getLow(), succ) or succ = MkImplicitLowerSliceBound(this)) + or + (lastNode(getLow(), pred, normalCompletion()) or pred = MkImplicitLowerSliceBound(this)) and + (firstNode(getHigh(), succ) or succ = MkImplicitUpperSliceBound(this)) + or + (lastNode(getHigh(), pred, normalCompletion()) or pred = MkImplicitUpperSliceBound(this)) and + (firstNode(getMax(), succ) or succ = MkImplicitMaxSliceBound(this)) + or + (lastNode(getMax(), pred, normalCompletion()) or pred = MkImplicitMaxSliceBound(this)) and + succ = MkExprNode(this) + } + } + + private class StarExprTree extends PostOrderTree, StarExpr { + override ControlFlow::Node getNode() { result = mkExprOrSkipNode(this) } + + override Completion getCompletion() { result = Done() or result = Panic() } + + override ControlFlowTree getChildTree(int i) { i = 0 and result = getBase() } + } + + private class SwitchTree extends ControlFlowTree, SwitchStmt { + override predicate firstNode(ControlFlow::Node first) { + firstNode(getInit(), first) + or + not exists(getInit()) and + ( + firstNode(this.(ExpressionSwitchStmt).getExpr(), first) + or + first = MkImplicitTrue(this) + or + firstNode(this.(TypeSwitchStmt).getTest(), first) + ) + } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + lastNode(getInit(), last, cmpl) and + not cmpl.isNormal() + or + ( + lastNode(this.(ExpressionSwitchStmt).getExpr(), last, cmpl) + or + lastNode(this.(TypeSwitchStmt).getTest(), last, cmpl) + ) and + ( + not cmpl.isNormal() + or + not exists(this.getDefault()) + ) + or + last = MkImplicitTrue(this) and + cmpl = Bool(true) and + this.getNumCase() = 0 + or + exists(CaseClause cc, int i, Completion inner | + cc = this.getCase(i) and lastNode(cc, last, inner) + | + not exists(this.getDefault()) and + i = this.getNumCase() - 1 and + last = cc.(CaseClauseTree).getExprEnd(cc.getNumExpr() - 1, false) and + inner.isNormal() and + cmpl = inner + or + not last = cc.(CaseClauseTree).getExprEnd(_, _) and + inner.isNormal() and + cmpl = inner + or + if inner = Break(BranchTarget::of(this)) + then cmpl = Done() + else ( + not inner.isNormal() and inner != Fallthrough() and cmpl = inner + ) + ) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + lastNode(getInit(), pred, normalCompletion()) and + ( + firstNode(this.(ExpressionSwitchStmt).getExpr(), succ) or + succ = MkImplicitTrue(this) or + firstNode(this.(TypeSwitchStmt).getTest(), succ) + ) + or + ( + lastNode(this.(ExpressionSwitchStmt).getExpr(), pred, normalCompletion()) or + pred = MkImplicitTrue(this) or + lastNode(this.(TypeSwitchStmt).getTest(), pred, normalCompletion()) + ) and + ( + firstNode(getNonDefaultCase(0), succ) + or + not exists(getANonDefaultCase()) and + firstNode(getDefault(), succ) + ) + or + exists(CaseClause cc, int i | + cc = getNonDefaultCase(i) and + lastNode(cc, pred, normalCompletion()) and + pred = cc.(CaseClauseTree).getExprEnd(_, false) + | + firstNode(getNonDefaultCase(i + 1), succ) + or + i = getNumNonDefaultCase() - 1 and + firstNode(getDefault(), succ) + ) + or + exists(CaseClause cc, int i, CaseClause next | + cc = getCase(i) and + lastNode(cc, pred, Fallthrough()) and + next = getCase(i + 1) + | + firstNode(next.getStmt(0), succ) + or + succ = MkSkipNode(next) + ) + } + } + + private class TypeAssertTree extends PostOrderTree, TypeAssertExpr { + override ControlFlow::Node getNode() { result = MkExprNode(this) } + + override Completion getCompletion() { + result = Done() + or + // panic due to type mismatch, but not if the assertion appears in an assignment or + // initialization with two variables or a type-switch + not exists(Assignment assgn | assgn.getNumLhs() = 2 and this = assgn.getRhs().stripParens()) and + not exists(ValueSpec vs | vs.getNumName() = 2 and this = vs.getInit().stripParens()) and + not exists(TypeSwitchStmt ts | this = ts.getExpr()) and + result = Panic() + } + + override ControlFlowTree getChildTree(int i) { i = 0 and result = getExpr() } + } + + private class UnaryExprTree extends ControlFlowTree, UnaryExpr { + override predicate firstNode(ControlFlow::Node first) { firstNode(getOperand(), first) } + + override predicate lastNode(ControlFlow::Node last, Completion cmpl) { + last = MkExprNode(this) and + ( + cmpl = Done() + or + this instanceof DerefExpr and cmpl = Panic() + ) + } + + override predicate succ(ControlFlow::Node pred, ControlFlow::Node succ) { + ControlFlowTree.super.succ(pred, succ) + or + not this = any(RecvStmt recv).getExpr() and + lastNode(getOperand(), pred, normalCompletion()) and + succ = MkExprNode(this) + } + } + + private ControlFlow::Node mkExprOrSkipNode(Expr e) { + result = MkExprNode(e) or + result = MkSkipNode(e) + } + + /** Holds if evaluation of `root` may start at `first`. */ + cached + predicate firstNode(ControlFlowTree root, ControlFlow::Node first) { root.firstNode(first) } + + /** Holds if evaluation of `root` may complete normally after `last`. */ + cached + predicate lastNode(ControlFlowTree root, ControlFlow::Node last) { + lastNode(root, last, normalCompletion()) + } + + private predicate lastNode(ControlFlowTree root, ControlFlow::Node last, Completion cmpl) { + root.lastNode(last, cmpl) + } + + /** Gets a successor of `nd` that is not a `defer` node */ + private ControlFlow::Node notDeferSucc(ControlFlow::Node nd) { + not result = MkDeferNode(_) and + result = succ(nd) + } + + /** Gets `defer` statements that can be the first defer statement after `nd` in the CFG */ + private ControlFlow::Node nextDefer(ControlFlow::Node nd) { + nd = MkDeferNode(_) and + result = MkDeferNode(_) and + ( + result = succ(nd) + or + result = succ(notDeferSucc+(nd)) + ) + } + + /** + * Holds if the function `f` may return without panicking, exiting the process, or looping forever. + * + * This is defined conservatively, and so may also hold of a function that in fact + * cannot return normally, but never fails to hold of a function that can return normally. + */ + cached + predicate mayReturnNormally(ControlFlowTree root) { + exists(ControlFlow::Node last, Completion cmpl | lastNode(root, last, cmpl) and cmpl != Panic()) + } + + /** + * Holds if `pred` is the node for the case `testExpr` in an expression + * switch statement which is switching on `switchExpr`, and `succ` is the + * node to be executed next if the case test succeeds. + */ + cached + predicate isSwitchCaseTestPassingEdge( + ControlFlow::Node pred, ControlFlow::Node succ, Expr switchExpr, Expr testExpr + ) { + exists(ExpressionSwitchStmt ess | ess.getExpr() = switchExpr | + ess.getACase().(CaseClauseTree).isPassingEdge(_, pred, succ, testExpr) + ) + } + + /** Gets a successor of `nd`, that is, a node that is executed after `nd`. */ + cached + ControlFlow::Node succ(ControlFlow::Node nd) { any(ControlFlowTree tree).succ(nd, result) } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/IR.qll b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/IR.qll new file mode 100644 index 00000000000..552425f7edb --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/controlflow/IR.qll @@ -0,0 +1,1669 @@ +/** + * Provides classes and predicates for working with an intermediate representation (IR) of Go + * programs that is used as the foundation of the control flow and data flow graphs. + * + * In the IR, the program is represented as a set of instructions, which correspond to expressions + * and statements that compute a value or perform an operation (as opposed to providing syntactic + * structure or type information). + * + * Each instruction is also a control-flow node, but there are control-flow nodes that are not + * instructions (synthetic entry and exit nodes, as well as no-op skip nodes). + */ + +import go +private import semmle.go.controlflow.ControlFlowGraphImpl + +/** Provides predicates and classes for working with IR constructs. */ +module IR { + /** + * An IR instruction. + */ + class Instruction extends ControlFlow::Node { + Instruction() { + this instanceof MkExprNode or + this instanceof MkLiteralElementInitNode or + this instanceof MkImplicitLiteralElementIndex or + this instanceof MkAssignNode or + this instanceof MkCompoundAssignRhsNode or + this instanceof MkExtractNode or + this instanceof MkZeroInitNode or + this instanceof MkFuncDeclNode or + this instanceof MkDeferNode or + this instanceof MkGoNode or + this instanceof MkConditionGuardNode or + this instanceof MkIncDecNode or + this instanceof MkIncDecRhs or + this instanceof MkImplicitOne or + this instanceof MkReturnNode or + this instanceof MkResultWriteNode or + this instanceof MkResultReadNode or + this instanceof MkSelectNode or + this instanceof MkSendNode or + this instanceof MkParameterInit or + this instanceof MkArgumentNode or + this instanceof MkResultInit or + this instanceof MkNextNode or + this instanceof MkImplicitTrue or + this instanceof MkCaseCheckNode or + this instanceof MkImplicitLowerSliceBound or + this instanceof MkImplicitUpperSliceBound or + this instanceof MkImplicitMaxSliceBound or + this instanceof MkImplicitDeref or + this instanceof MkImplicitFieldSelection + } + + /** Holds if this instruction reads the value of variable or constant `v`. */ + predicate reads(ValueEntity v) { this.readsField(_, v) or this.readsMethod(_, v) } + + /** Holds if this instruction updates variable or constant `v` to the value of `rhs`. */ + predicate writes(ValueEntity v, Instruction rhs) { this.writesField(_, v, rhs) } + + /** Holds if this instruction reads the value of field `f` on the value of `base`. */ + predicate readsField(Instruction base, Field f) { none() } + + /** Holds if this instruction updates the value of field `f` on the value of `base`. */ + predicate writesField(Instruction base, Field f, Instruction rhs) { none() } + + /** Holds if this instruction looks up method `m` on the value of `receiver`. */ + predicate readsMethod(Instruction receiver, Method m) { none() } + + /** Holds if this instruction reads the value of element `index` on the value of `base`. */ + predicate readsElement(Instruction base, Instruction index) { none() } + + /** Holds if this instruction updates the value of element `index` on the value of `base`. */ + predicate writesElement(Instruction base, Instruction index) { none() } + + /** Gets the type of the result of this instruction, if any. */ + Type getResultType() { none() } + + /** Gets the float value of the result of this instruction, if it can be determined. */ + float getFloatValue() { none() } + + /** Gets the int value of the result of this instruction, if it can be determined. */ + int getIntValue() { none() } + + /** + * Holds if the complex value of the result of this instruction has real part `real` and + * imaginary part `imag`. + */ + predicate hasComplexValue(float real, float imag) { none() } + + /** Gets either `getFloatValue` or `getIntValue` */ + float getNumericValue() { result = this.getFloatValue() or result = this.getIntValue() } + + /** + * Gets the string representation of the exact value of the result of this instruction, + * if any. + * + * For example, for the constant 3.141592653589793238462, this will + * result in 1570796326794896619231/500000000000000000000 + */ + string getExactValue() { none() } + + /** Gets the string value of the result of this instruction, if it can be determined. */ + string getStringValue() { none() } + + /** Gets the Boolean value of the result of this instruction, if it can be determined. */ + boolean getBoolValue() { none() } + + /** Holds if the result of this instruction is known at compile time. */ + predicate isConst() { none() } + + /** + * Holds if the result of this instruction is known at compile time, and is guaranteed not to + * depend on the platform where it is evaluated. + */ + predicate isPlatformIndependentConstant() { none() } + + /** Gets a textual representation of the kind of this instruction. */ + string getInsnKind() { + this instanceof MkExprNode and result = "expression" + or + this instanceof MkLiteralElementInitNode and result = "element init" + or + this instanceof MkImplicitLiteralElementIndex and result = "element index" + or + this instanceof MkAssignNode and result = "assignment" + or + this instanceof MkCompoundAssignRhsNode and result = "right-hand side of compound assignment" + or + this instanceof MkExtractNode and result = "tuple element extraction" + or + this instanceof MkZeroInitNode and result = "zero value" + or + this instanceof MkFuncDeclNode and result = "function declaration" + or + this instanceof MkDeferNode and result = "defer" + or + this instanceof MkGoNode and result = "go" + or + this instanceof MkConditionGuardNode and result = "condition guard" + or + this instanceof MkIncDecNode and result = "increment/decrement" + or + this instanceof MkIncDecRhs and result = "right-hand side of increment/decrement" + or + this instanceof MkImplicitOne and result = "implicit 1" + or + this instanceof MkReturnNode and result = "return" + or + this instanceof MkResultWriteNode and result = "result write" + or + this instanceof MkResultReadNode and result = "result read" + or + this instanceof MkSelectNode and result = "select" + or + this instanceof MkSendNode and result = "send" + or + this instanceof MkParameterInit and result = "parameter initialization" + or + this instanceof MkArgumentNode and result = "argument" + or + this instanceof MkResultInit and result = "result initialization" + or + this instanceof MkNextNode and result = "next key-value pair" + or + this instanceof MkImplicitTrue and result = "implicit true" + or + this instanceof MkCaseCheckNode and result = "case" + or + this instanceof MkImplicitLowerSliceBound and result = "implicit lower bound" + or + this instanceof MkImplicitUpperSliceBound and result = "implicit upper bound" + or + this instanceof MkImplicitMaxSliceBound and result = "implicit maximum" + or + this instanceof MkImplicitDeref and result = "implicit dereference" + or + this instanceof MkImplicitFieldSelection and result = "implicit field selection" + } + } + + /** + * An IR instruction representing the evaluation of an expression. + */ + class EvalInstruction extends Instruction, MkExprNode { + Expr e; + + EvalInstruction() { this = MkExprNode(e) } + + /** Gets the expression underlying this instruction. */ + Expr getExpr() { result = e } + + override predicate reads(ValueEntity v) { e = v.getAReference() } + + override Type getResultType() { result = e.getType() } + + override ControlFlow::Root getRoot() { result.isRootOf(e) } + + override float getFloatValue() { result = e.getFloatValue() } + + override int getIntValue() { result = e.getIntValue() } + + override predicate hasComplexValue(float real, float imag) { e.hasComplexValue(real, imag) } + + override string getExactValue() { result = e.getExactValue() } + + override string getStringValue() { result = e.getStringValue() } + + override boolean getBoolValue() { result = e.getBoolValue() } + + override predicate isConst() { e.isConst() } + + override predicate isPlatformIndependentConstant() { e.isPlatformIndependentConstant() } + + override string toString() { result = e.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + e.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An IR instruction that reads the value of a variable, constant, field or array element, + * or refers to a function. + */ + class ReadInstruction extends Instruction { + ReadInstruction() { + exists(Expr e | e = this.(EvalInstruction).getExpr() | + (e instanceof ValueName or e instanceof IndexExpr) and + e.(ReferenceExpr).isRvalue() + ) + or + this instanceof ReadResultInstruction + or + this instanceof MkImplicitFieldSelection + } + } + + /** + * Gets the effective base of a selector, index or slice expression, taking implicit dereferences + * and implicit field reads into account. + * + * For a selector expression `b.f`, this could be the implicit dereference `*b`, or the implicit + * field access `b.Embedded` if the field `f` is promoted from an embedded type `Embedded`, or a + * combination of both `*(b.Embedded)`, or simply `b` if neither case applies. + */ + private Instruction selectorBase(Expr e) { + exists(ImplicitFieldReadInstruction fri | fri.getSelectorExpr() = e and fri.getIndex() = 1 | + result = fri + ) + or + not exists(ImplicitFieldReadInstruction fri | fri.getSelectorExpr() = e and fri.getIndex() = 1) and + exists(Expr base | + base = e.(SelectorExpr).getBase() + or + base = e.(IndexExpr).getBase() + or + base = e.(SliceExpr).getBase() + | + result = MkImplicitDeref(base) + or + not exists(MkImplicitDeref(base)) and + result = evalExprInstruction(base) + ) + } + + /** + * An IR instruction that reads a component from a composite object. + * + * This is either a field of a struct, or an element of an array, map, slice or string. + */ + class ComponentReadInstruction extends ReadInstruction { + ComponentReadInstruction() { + exists(Expr e | e = this.(EvalInstruction).getExpr() | + e instanceof IndexExpr + or + e.(SelectorExpr).getBase() instanceof ValueExpr and + not e.(SelectorExpr).getSelector() = any(Method method).getAReference() + ) + or + this instanceof MkImplicitFieldSelection + } + + /** Gets the instruction computing the base value on which the field or element is read. */ + Instruction getBase() { + result = this.(ImplicitFieldReadInstruction).getBaseInstruction() + or + result = selectorBase(this.(EvalInstruction).getExpr()) + } + } + + /** + * An IR instruction that reads the value of a field. + * + * On snapshots with incomplete type information, method expressions may sometimes be + * misclassified as field reads. + */ + class FieldReadInstruction extends ComponentReadInstruction { + SelectorExpr e; + int index; + Field field; + + FieldReadInstruction() { + e = this.(EvalInstruction).getExpr() and + index = 0 and + field.getAReference() = e.getSelector() + or + this = MkImplicitFieldSelection(e, index, field) + } + + /** Gets the `SelectorExpr` of this field read. */ + SelectorExpr getSelectorExpr() { result = e } + + /** Gets the index of this field read. */ + int getIndex() { result = index } + + /** Gets the field being read. */ + Field getField() { result = field } + + Instruction getBaseInstruction() { + exists(ImplicitFieldReadInstruction fri | + fri.getSelectorExpr() = e and fri.getIndex() = pragma[only_bind_into](index + 1) + | + result = fri + ) + or + not exists(ImplicitFieldReadInstruction fri | + fri.getSelectorExpr() = e and fri.getIndex() = pragma[only_bind_into](index + 1) + ) and + ( + result = MkImplicitDeref(e.getBase()) + or + not exists(MkImplicitDeref(e.getBase())) and + result = evalExprInstruction(e.getBase()) + ) + } + + override predicate readsField(Instruction base, Field f) { + base = this.getBaseInstruction() and f = field + } + } + + /** + * An IR instruction for an implicit field read as part of reading a + * promoted field. + * + * If the field that is being implicitly read has a pointer type then this + * instruction represents an implicit dereference of it. + */ + class ImplicitFieldReadInstruction extends FieldReadInstruction, MkImplicitFieldSelection { + ImplicitFieldReadInstruction() { this = MkImplicitFieldSelection(e, index, field) } + + override predicate reads(ValueEntity v) { v = field } + + override Type getResultType() { + if field.getType() instanceof PointerType + then result = field.getType().(PointerType).getBaseType() + else result = field.getType() + } + + override ControlFlow::Root getRoot() { result.isRootOf(e) } + + override string toString() { result = "implicit read of field " + field.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + e.getBase().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An IR instruction that looks up a method. + */ + class MethodReadInstruction extends ReadInstruction, EvalInstruction { + Method method; + override SelectorExpr e; + + MethodReadInstruction() { e.getSelector() = method.getAReference() } + + /** Gets the instruction computing the receiver value on which the method is looked up. */ + Instruction getReceiver() { result = selectorBase(e) } + + /** Gets the method being looked up. */ + Method getMethod() { result = method } + + override predicate readsMethod(Instruction receiver, Method m) { + receiver = getReceiver() and m = getMethod() + } + } + + /** + * An IR instruction that reads an element of an array, slice, map or string. + */ + class ElementReadInstruction extends ComponentReadInstruction, EvalInstruction { + override IndexExpr e; + + /** Gets the instruction computing the index of the element being looked up. */ + Instruction getIndex() { result = evalExprInstruction(e.getIndex()) } + + override predicate readsElement(Instruction base, Instruction index) { + base = getBase() and index = getIndex() + } + } + + /** + * An IR instruction that constructs a slice. + */ + class SliceInstruction extends EvalInstruction { + override SliceExpr e; + + /** Gets the instruction computing the base value from which the slice is constructed. */ + Instruction getBase() { result = selectorBase(e) } + + /** Gets the instruction computing the lower bound of the slice. */ + Instruction getLow() { + result = evalExprInstruction(e.getLow()) or + result = implicitLowerSliceBoundInstruction(e) + } + + /** Gets the instruction computing the upper bound of the slice. */ + Instruction getHigh() { + result = evalExprInstruction(e.getHigh()) or + result = implicitUpperSliceBoundInstruction(e) + } + + /** Gets the instruction computing the capacity of the slice. */ + Instruction getMax() { + result = evalExprInstruction(e.getMax()) or + result = implicitMaxSliceBoundInstruction(e) + } + } + + /** + * An IR instruction that writes a memory location. + */ + class WriteInstruction extends Instruction { + WriteTarget lhs; + + WriteInstruction() { + lhs = MkLhs(this, _) + or + lhs = MkLiteralElementTarget(this) + or + lhs = MkResultWriteTarget(this) + } + + /** Gets the target to which this instruction writes. */ + WriteTarget getLhs() { result = lhs } + + /** Gets the instruction computing the value this instruction writes. */ + Instruction getRhs() { none() } + + override predicate writes(ValueEntity v, Instruction rhs) { + getLhs().refersTo(v) and + rhs = getRhs() + } + } + + /** + * An IR instruction that initializes a component of a composite literal. + */ + class InitLiteralComponentInstruction extends WriteInstruction, MkLiteralElementInitNode { + CompositeLit lit; + int i; + Expr elt; + + InitLiteralComponentInstruction() { + this = MkLiteralElementInitNode(elt) and elt = lit.getElement(i) + } + + /** Gets the instruction allocating the composite literal. */ + Instruction getBase() { result = evalExprInstruction(lit) } + + override Instruction getRhs() { + result = evalExprInstruction(elt) or + result = evalExprInstruction(elt.(KeyValueExpr).getValue()) + } + + override ControlFlow::Root getRoot() { result.isRootOf(elt) } + + override string toString() { result = "init of " + elt } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + elt.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An IR instruction that initializes a field of a struct literal. + */ + class InitLiteralStructFieldInstruction extends InitLiteralComponentInstruction { + override StructLit lit; + + /** Gets the name of the initialized field. */ + string getFieldName() { + if elt instanceof KeyValueExpr + then result = elt.(KeyValueExpr).getKey().(Ident).getName() + else lit.getStructType().hasOwnField(i, result, _, _) + } + + /** Gets the initialized field. */ + Field getField() { + result.getDeclaringType() = lit.getStructType() and + result.getName() = getFieldName() + } + } + + /** + * An IR instruction that initializes an element of an array, slice or map literal. + */ + class InitLiteralElementInstruction extends InitLiteralComponentInstruction { + Type literalType; + + InitLiteralElementInstruction() { + literalType = lit.getType().getUnderlyingType() and + ( + literalType instanceof ArrayType or + literalType instanceof SliceType or + literalType instanceof MapType + ) + } + + /** Gets the instruction computing the index of the initialized element. */ + Instruction getIndex() { + result = evalExprInstruction(elt.(KeyValueExpr).getKey()) + or + result = MkImplicitLiteralElementIndex(elt) + } + } + + /** + * An IR instruction that initializes an element of an array literal. + */ + class InitLiteralArrayElementInstruction extends InitLiteralElementInstruction { + override ArrayType literalType; + } + + /** + * An IR instruction that initializes an element of a slice literal. + */ + class InitLiteralSliceElementInstruction extends InitLiteralElementInstruction { + override SliceType literalType; + } + + /** + * An IR instruction that initializes an element of a map literal. + */ + class InitLiteralMapElementInstruction extends InitLiteralElementInstruction { + override MapType literalType; + } + + /** + * An IR instruction that writes to a field. + */ + class FieldWriteInstruction extends WriteInstruction { + override FieldTarget lhs; + + /** Gets the instruction computing the base value on which the field is written. */ + Instruction getBase() { result = lhs.getBase() } + + /** Gets the field being written. */ + Field getField() { result = lhs.getField() } + + override predicate writesField(Instruction base, Field f, Instruction rhs) { + getBase() = base and + getField() = f and + getRhs() = rhs + } + } + + /** + * An IR instruction that writes to an element of an array, slice, or map. + */ + class ElementWriteInstruction extends WriteInstruction { + override ElementTarget lhs; + + /** Gets the instruction computing the base value on which the field is written. */ + Instruction getBase() { result = lhs.getBase() } + + /** Gets the instruction computing the element index being written. */ + Instruction getIndex() { result = lhs.getIndex() } + + override predicate writesElement(Instruction base, Instruction index) { + getBase() = base and + getIndex() = index + } + } + + /** Holds if `lit` does not specify any explicit keys. */ + private predicate noExplicitKeys(CompositeLit lit) { + not lit.getAnElement() instanceof KeyValueExpr + } + + /** Gets the index of the `i`th element in (array or slice) literal `lit`. */ + private int getElementIndex(CompositeLit lit, int i) { + ( + lit.getType().getUnderlyingType() instanceof ArrayType or + lit.getType().getUnderlyingType() instanceof SliceType + ) and + exists(Expr elt | elt = lit.getElement(i) | + // short-circuit computation for literals without any explicit keys + noExplicitKeys(lit) and result = i + or + result = elt.(KeyValueExpr).getKey().getIntValue() + or + not elt instanceof KeyValueExpr and + ( + i = 0 and result = 0 + or + result = getElementIndex(lit, i - 1) + 1 + ) + ) + } + + /** + * An IR instruction computing the implicit index of an element in an array or slice literal. + */ + class ImplicitLiteralElementIndexInstruction extends Instruction, MkImplicitLiteralElementIndex { + Expr elt; + + ImplicitLiteralElementIndexInstruction() { this = MkImplicitLiteralElementIndex(elt) } + + override Type getResultType() { result instanceof IntType } + + override ControlFlow::Root getRoot() { result.isRootOf(elt) } + + override int getIntValue() { + exists(CompositeLit lit, int i | elt = lit.getElement(i) | result = getElementIndex(lit, i)) + } + + override string getStringValue() { none() } + + override string getExactValue() { result = getIntValue().toString() } + + override predicate isPlatformIndependentConstant() { any() } + + override predicate isConst() { any() } + + override string toString() { result = "element index" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + elt.hasLocationInfo(filepath, startline, startcolumn, _, _) and + endline = startline and + endcolumn = startcolumn + } + } + + /** + * An instruction assigning to a variable or field. + */ + class AssignInstruction extends WriteInstruction, MkAssignNode { + AstNode assgn; + int i; + + AssignInstruction() { this = MkAssignNode(assgn, i) } + + override Instruction getRhs() { + exists(SimpleAssignStmt a | a = assgn | + a.getNumLhs() = a.getNumRhs() and + result = evalExprInstruction(a.getRhs(i)) + ) + or + exists(ValueSpec spec | spec = assgn | + spec.getNumName() = spec.getNumInit() and + result = evalExprInstruction(spec.getInit(i)) + or + result = MkZeroInitNode(any(ValueEntity v | spec.getNameExpr(i) = v.getDeclaration())) + ) + or + result = MkCompoundAssignRhsNode(assgn) + or + result = MkExtractNode(assgn, i) + } + + override ControlFlow::Root getRoot() { result.isRootOf(assgn) } + + override string toString() { result = "assignment to " + getLhs() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getLhs().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** An instruction computing the value of the right-hand side of a compound assignment. */ + class EvalCompoundAssignRhsInstruction extends Instruction, MkCompoundAssignRhsNode { + CompoundAssignStmt assgn; + + EvalCompoundAssignRhsInstruction() { this = MkCompoundAssignRhsNode(assgn) } + + /** Gets the underlying assignment of this instruction. */ + CompoundAssignStmt getAssignment() { result = assgn } + + override Type getResultType() { result = assgn.getRhs().getType() } + + override ControlFlow::Root getRoot() { result.isRootOf(assgn) } + + override string toString() { result = assgn.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + assgn.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction selecting one of multiple values returned by a function, or either the key + * or the value of the iterator in a range loop, or the result or success value from a type + * assertion. + */ + class ExtractTupleElementInstruction extends Instruction, MkExtractNode { + AstNode s; + int i; + + ExtractTupleElementInstruction() { this = MkExtractNode(s, i) } + + /** Gets the instruction computing the tuple value from which one value is extracted. */ + Instruction getBase() { + exists(Expr baseExpr | + baseExpr = s.(Assignment).getRhs() or + baseExpr = s.(ValueSpec).getInit() + | + result = evalExprInstruction(baseExpr) + ) + or + result = MkNextNode(s) + or + result = evalExprInstruction(s.(ReturnStmt).getExpr()) + or + result = evalExprInstruction(s.(CallExpr).getArgument(0).stripParens()) + } + + /** Holds if this extracts the `idx`th value of the result of `base`. */ + predicate extractsElement(Instruction base, int idx) { base = this.getBase() and idx = i } + + override Type getResultType() { + exists(CallExpr c | this.getBase() = evalExprInstruction(c) | + result = c.getTarget().getResultType(i) + ) + or + exists(Expr e | this.getBase() = evalExprInstruction(e) | + result = e.getType().(TupleType).getComponentType(pragma[only_bind_into](i)) + ) + or + exists(Type rangeType | rangeType = s.(RangeStmt).getDomain().getType().getUnderlyingType() | + exists(Type baseType | + baseType = rangeType.(ArrayType).getElementType() or + baseType = + rangeType.(PointerType).getBaseType().getUnderlyingType().(ArrayType).getElementType() or + baseType = rangeType.(SliceType).getElementType() + | + i = 0 and + result instanceof IntType + or + i = 1 and + result = baseType + ) + or + rangeType instanceof StringType and + ( + i = 0 and + result instanceof IntType + or + result = Builtin::rune().getType() + ) + or + exists(MapType map | map = rangeType | + i = 0 and + result = map.getKeyType() + or + i = 1 and + result = map.getValueType() + ) + or + i = 0 and + result = rangeType.(RecvChanType).getElementType() + or + i = 0 and + result = rangeType.(SendRecvChanType).getElementType() + ) + } + + override ControlFlow::Root getRoot() { result.isRootOf(s) } + + override string toString() { result = s + "[" + i + "]" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + s.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that computes the zero value for a variable or constant. + */ + class EvalImplicitInitInstruction extends Instruction, MkZeroInitNode { + ValueEntity v; + + EvalImplicitInitInstruction() { this = MkZeroInitNode(v) } + + override Type getResultType() { result = v.getType() } + + override ControlFlow::Root getRoot() { result.isRootOf(v.getDeclaration()) } + + override int getIntValue() { + v.getType().getUnderlyingType() instanceof IntegerType and result = 0 + } + + override float getFloatValue() { + v.getType().getUnderlyingType() instanceof FloatType and result = 0.0 + } + + override string getStringValue() { + v.getType().getUnderlyingType() instanceof StringType and result = "" + } + + override boolean getBoolValue() { + v.getType().getUnderlyingType() instanceof BoolType and result = false + } + + override string getExactValue() { + result = getIntValue().toString() or + result = getFloatValue().toString() or + result = getStringValue().toString() or + result = getBoolValue().toString() + } + + override predicate isConst() { any() } + + override predicate isPlatformIndependentConstant() { any() } + + override string toString() { result = "zero value for " + v } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + v.getDeclaration().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that corresponds to the declaration of a function. + */ + class DeclareFunctionInstruction extends Instruction, MkFuncDeclNode { + FuncDecl fd; + + DeclareFunctionInstruction() { this = MkFuncDeclNode(fd) } + + override Type getResultType() { result = fd.getType() } + + override string toString() { result = fd.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + fd.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that corresponds to a `defer` statement. + */ + class DeferInstruction extends Instruction, MkDeferNode { + DeferStmt defer; + + DeferInstruction() { this = MkDeferNode(defer) } + + override ControlFlow::Root getRoot() { result.isRootOf(defer) } + + override string toString() { result = defer.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + defer.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that corresponds to a `go` statement. + */ + class GoInstruction extends Instruction, MkGoNode { + GoStmt go; + + GoInstruction() { this = MkGoNode(go) } + + override ControlFlow::Root getRoot() { result.isRootOf(go) } + + override string toString() { result = go.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + go.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that corresponds to an increment or decrement statement. + */ + class IncDecInstruction extends WriteInstruction, MkIncDecNode { + IncDecStmt ids; + + IncDecInstruction() { this = MkIncDecNode(ids) } + + override Instruction getRhs() { result = MkIncDecRhs(ids) } + + override ControlFlow::Root getRoot() { result.isRootOf(ids) } + + override string toString() { result = ids.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + ids.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that computes the (implicit) right-hand side of an increment or + * decrement statement. + */ + class EvalIncDecRhsInstruction extends Instruction, MkIncDecRhs { + IncDecStmt ids; + + EvalIncDecRhsInstruction() { this = MkIncDecRhs(ids) } + + /** Gets the corresponding increment or decrement statement. */ + IncDecStmt getStmt() { result = ids } + + override Type getResultType() { result = ids.getOperand().getType() } + + override ControlFlow::Root getRoot() { result.isRootOf(ids) } + + override string toString() { result = "rhs of " + ids } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + ids.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction computing the implicit operand `1` in an increment or decrement statement. + */ + class EvalImplicitOneInstruction extends Instruction, MkImplicitOne { + IncDecStmt ids; + + EvalImplicitOneInstruction() { this = MkImplicitOne(ids) } + + /** Gets the corresponding increment or decrement statement. */ + IncDecStmt getStmt() { result = ids } + + override Type getResultType() { result = ids.getOperand().getType() } + + override ControlFlow::Root getRoot() { result.isRootOf(ids) } + + override int getIntValue() { result = 1 } + + override string getExactValue() { result = "1" } + + override predicate isConst() { any() } + + override predicate isPlatformIndependentConstant() { any() } + + override string toString() { result = "1" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + ids.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction corresponding to a return from a function. + */ + class ReturnInstruction extends Instruction, MkReturnNode { + ReturnStmt ret; + + ReturnInstruction() { this = MkReturnNode(ret) } + + /** Gets the corresponding `ReturnStmt`. */ + ReturnStmt getReturnStmt() { result = ret } + + /** Holds if this statement returns multiple results. */ + predicate returnsMultipleResults() { exists(MkExtractNode(ret, _)) or ret.getNumExpr() > 1 } + + /** Gets the instruction whose result is the (unique) result returned by this statement. */ + Instruction getResult() { + not returnsMultipleResults() and + result = evalExprInstruction(ret.getExpr()) + } + + /** Gets the instruction whose result is the `i`th result returned by this statement. */ + Instruction getResult(int i) { + result = MkExtractNode(ret, i) + or + not exists(MkExtractNode(ret, _)) and + result = evalExprInstruction(ret.getExpr(i)) + } + + override ControlFlow::Root getRoot() { result.isRootOf(ret) } + + override string toString() { result = ret.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + ret.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that represents the implicit assignment to a result variable + * performed by a return statement. + */ + class WriteResultInstruction extends WriteInstruction, MkResultWriteNode { + ResultVariable var; + int i; + ReturnInstruction ret; + + WriteResultInstruction() { + exists(ReturnStmt retstmt | + this = MkResultWriteNode(var, i, retstmt) and + ret = MkReturnNode(retstmt) + ) + } + + override Instruction getRhs() { result = ret.getResult(i) } + + /** Gets the result variable being assigned. */ + ResultVariable getResultVariable() { result = var } + + override Type getResultType() { result = var.getType() } + + override ControlFlow::Root getRoot() { var = result.(FuncDef).getAResultVar() } + + override string toString() { result = "implicit write of " + var } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + ret.getResult(i).hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that reads the final value of a result variable upon returning + * from a function. + */ + class ReadResultInstruction extends Instruction, MkResultReadNode { + ResultVariable var; + + ReadResultInstruction() { this = MkResultReadNode(var) } + + override predicate reads(ValueEntity v) { v = var } + + override Type getResultType() { result = var.getType() } + + override ControlFlow::Root getRoot() { var = result.(FuncDef).getAResultVar() } + + override string toString() { result = "implicit read of " + var } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + var.getDeclaration().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction corresponding to a `select` statement. + */ + class SelectInstruction extends Instruction, MkSelectNode { + SelectStmt sel; + + SelectInstruction() { this = MkSelectNode(sel) } + + override ControlFlow::Root getRoot() { result.isRootOf(sel) } + + override string toString() { result = sel.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + sel.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction corresponding to a send statement. + */ + class SendInstruction extends Instruction, MkSendNode { + SendStmt send; + + SendInstruction() { this = MkSendNode(send) } + + override ControlFlow::Root getRoot() { result.isRootOf(send) } + + override string toString() { result = send.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + send.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction initializing a parameter to the corresponding argument. + */ + class InitParameterInstruction extends WriteInstruction, MkParameterInit { + Parameter parm; + + InitParameterInstruction() { this = MkParameterInit(parm) } + + override Instruction getRhs() { result = MkArgumentNode(parm) } + + override ControlFlow::Root getRoot() { result = parm.getFunction() } + + override string toString() { result = "initialization of " + parm } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + parm.getDeclaration().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction reading the value of a function argument. + */ + class ReadArgumentInstruction extends Instruction, MkArgumentNode { + Parameter parm; + + ReadArgumentInstruction() { this = MkArgumentNode(parm) } + + override Type getResultType() { result = parm.getType() } + + override ControlFlow::Root getRoot() { result = parm.getFunction() } + + override string toString() { result = "argument corresponding to " + parm } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + parm.getDeclaration().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction initializing a result variable to its zero value. + */ + class InitResultInstruction extends WriteInstruction, MkResultInit { + ResultVariable res; + + InitResultInstruction() { this = MkResultInit(res) } + + override Instruction getRhs() { result = MkZeroInitNode(res) } + + override ControlFlow::Root getRoot() { result = res.getFunction() } + + override string toString() { result = "initialization of " + res } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + res.getDeclaration().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction that gets the next key-value pair in a range loop. + */ + class GetNextEntryInstruction extends Instruction, MkNextNode { + RangeStmt rs; + + GetNextEntryInstruction() { this = MkNextNode(rs) } + + /** + * Gets the instruction computing the value whose key-value pairs this instruction reads. + */ + Instruction getDomain() { result = evalExprInstruction(rs.getDomain()) } + + override ControlFlow::Root getRoot() { result.isRootOf(rs) } + + override string toString() { result = "next key-value pair in range" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + rs.getDomain().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction computing the implicit `true` value in an expression-less `switch` statement. + */ + class EvalImplicitTrueInstruction extends Instruction, MkImplicitTrue { + Stmt stmt; + + EvalImplicitTrueInstruction() { this = MkImplicitTrue(stmt) } + + override Type getResultType() { result instanceof BoolType } + + override ControlFlow::Root getRoot() { result.isRootOf(stmt) } + + override boolean getBoolValue() { result = true } + + override string getExactValue() { result = "true" } + + override predicate isConst() { any() } + + override predicate isPlatformIndependentConstant() { any() } + + override string toString() { result = "true" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + stmt.hasLocationInfo(filepath, startline, startcolumn, _, _) and + endline = startline and + endcolumn = startcolumn + } + } + + /** + * An instruction corresponding to the implicit comparison or type check performed by an + * expression in a `case` clause. + * + * For example, consider this `switch` statement: + * + * ```go + * switch x { + * case 2, y+1: + * ... + * } + * ``` + * + * The expressions `2` and `y+1` are implicitly compared to `x`. These comparisons are + * represented by case instructions. + */ + class CaseInstruction extends Instruction, MkCaseCheckNode { + CaseClause cc; + int i; + + CaseInstruction() { this = MkCaseCheckNode(cc, i) } + + override ControlFlow::Root getRoot() { result.isRootOf(cc) } + + override string toString() { result = "case " + cc.getExpr(i) } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + cc.getExpr(i).hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction computing the implicit lower slice bound of zero in a slice expression without + * an explicit lower bound. + */ + class EvalImplicitLowerSliceBoundInstruction extends Instruction, MkImplicitLowerSliceBound { + SliceExpr slice; + + EvalImplicitLowerSliceBoundInstruction() { this = MkImplicitLowerSliceBound(slice) } + + override Type getResultType() { result instanceof IntType } + + override ControlFlow::Root getRoot() { result.isRootOf(slice) } + + override int getIntValue() { result = 0 } + + override string getExactValue() { result = "0" } + + override predicate isConst() { any() } + + override predicate isPlatformIndependentConstant() { any() } + + override string toString() { result = "0" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + slice.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction computing the implicit upper slice bound in a slice expression without an + * explicit upper bound. + */ + class EvalImplicitUpperSliceBoundInstruction extends Instruction, MkImplicitUpperSliceBound { + SliceExpr slice; + + EvalImplicitUpperSliceBoundInstruction() { this = MkImplicitUpperSliceBound(slice) } + + override ControlFlow::Root getRoot() { result.isRootOf(slice) } + + override Type getResultType() { result instanceof IntType } + + override string toString() { result = "len" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + slice.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction computing the implicit maximum slice bound in a slice expression without an + * explicit maximum bound. + */ + class EvalImplicitMaxSliceBoundInstruction extends Instruction, MkImplicitMaxSliceBound { + SliceExpr slice; + + EvalImplicitMaxSliceBoundInstruction() { this = MkImplicitMaxSliceBound(slice) } + + override ControlFlow::Root getRoot() { result.isRootOf(slice) } + + override Type getResultType() { result instanceof IntType } + + override string toString() { result = "cap" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + slice.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * An instruction implicitly dereferencing the base in a field or method reference through a + * pointer, or the base in an element or slice reference through a pointer. + */ + class EvalImplicitDerefInstruction extends Instruction, MkImplicitDeref { + Expr e; + + EvalImplicitDerefInstruction() { this = MkImplicitDeref(e) } + + /** Gets the operand that is being dereferenced. */ + Expr getOperand() { result = e } + + override Type getResultType() { + result = e.getType().getUnderlyingType().(PointerType).getBaseType() + } + + override ControlFlow::Root getRoot() { result.isRootOf(e) } + + override string toString() { result = "implicit dereference" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + e.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** A representation of the target of a write instruction. */ + class WriteTarget extends TWriteTarget { + ControlFlow::Node w; + + WriteTarget() { + this = MkLhs(w, _) or this = MkLiteralElementTarget(w) or this = MkResultWriteTarget(w) + } + + /** Gets the write instruction of which this is the target. */ + WriteInstruction getWrite() { result = w } + + /** Gets the name of the variable or field being written to, if any. */ + string getName() { none() } + + /** Gets the SSA variable being written to, if any. */ + SsaVariable asSsaVariable() { + getWrite() = result.getDefinition().(SsaExplicitDefinition).getInstruction() + } + + /** Holds if `e` is the variable or field being written to. */ + predicate refersTo(ValueEntity e) { none() } + + /** Gets a textual representation of this target. */ + string toString() { result = "write target" } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + filepath = "" and startline = 0 and startcolumn = 0 and endline = 0 and endcolumn = 0 + } + } + + /** A reference to a variable or constant, used as the target of a write. */ + class VarOrConstTarget extends WriteTarget { + Expr loc; + + VarOrConstTarget() { + this = MkLhs(_, loc) and + ( + loc instanceof Ident + or + loc instanceof SelectorExpr and + not loc.(SelectorExpr).getBase() instanceof ReferenceExpr + ) + or + exists(WriteResultInstruction wr | + this = MkResultWriteTarget(wr) and + evalExprInstruction(loc) = wr.getRhs() + ) + } + + override predicate refersTo(ValueEntity e) { + this instanceof MkLhs and + loc = e.getAReference() + or + exists(WriteResultInstruction wr | this = MkResultWriteTarget(wr) | + e = wr.getResultVariable() + ) + } + + override string getName() { + this = MkLhs(_, loc) and + ( + result = loc.(Ident).getName() + or + result = loc.(SelectorExpr).getSelector().getName() + ) + or + exists(WriteResultInstruction wr | this = MkResultWriteTarget(wr) | + result = wr.getResultVariable().getName() + ) + } + + /** Gets the variable this refers to, if any. */ + Variable getVariable() { refersTo(result) } + + /** Gets the constant this refers to, if any. */ + Constant getConstant() { refersTo(result) } + + override string toString() { result = getName() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + loc.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** A reference to a field, used as the target of a write. */ + class FieldTarget extends WriteTarget { + FieldTarget() { + exists(SelectorExpr sel | this = MkLhs(_, sel) | sel.getBase() instanceof ValueExpr) + or + w instanceof InitLiteralStructFieldInstruction + } + + /** Gets the instruction computing the base value on which this field is accessed. */ + Instruction getBase() { + exists(SelectorExpr sel | this = MkLhs(_, sel) | result = selectorBase(sel)) + or + result = w.(InitLiteralStructFieldInstruction).getBase() + } + + /** Get the type of the base of this field access, that is, the type that contains the field. */ + Type getBaseType() { result = this.getBase().getResultType() } + + override predicate refersTo(ValueEntity e) { + exists(SelectorExpr sel | this = MkLhs(_, sel) | sel.uses(e)) + or + e = w.(InitLiteralStructFieldInstruction).getField() + } + + override string getName() { exists(Field f | this.refersTo(f) | result = f.getName()) } + + /** Gets the field this refers to, if it can be determined. */ + Field getField() { refersTo(result) } + + override string toString() { + exists(SelectorExpr sel | this = MkLhs(_, sel) | + result = "field " + sel.getSelector().getName() + ) + or + result = "field " + w.(InitLiteralStructFieldInstruction).getFieldName() + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(SelectorExpr sel | this = MkLhs(_, sel) | + sel.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + or + w.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * A reference to an element of an array, slice or map, used as the target of a write. + */ + class ElementTarget extends WriteTarget { + ElementTarget() { + this = MkLhs(_, any(IndexExpr idx)) + or + w instanceof InitLiteralElementInstruction + } + + /** Gets the instruction computing the base value of this element reference. */ + Instruction getBase() { + exists(IndexExpr idx | this = MkLhs(_, idx) | result = selectorBase(idx)) + or + result = w.(InitLiteralComponentInstruction).getBase() + } + + /** Gets the instruction computing the index of this element reference. */ + Instruction getIndex() { + exists(IndexExpr idx | this = MkLhs(_, idx) | result = evalExprInstruction(idx.getIndex())) + or + result = w.(InitLiteralElementInstruction).getIndex() + } + + override string toString() { result = "element" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(IndexExpr idx | this = MkLhs(_, idx) | + idx.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + or + w.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * A pointer dereference, used as the target of a write. + */ + class PointerTarget extends WriteTarget { + Expr lhs; + + PointerTarget() { + this = MkLhs(_, lhs) and + (lhs instanceof StarExpr or lhs instanceof DerefExpr) + } + + /** Gets the instruction computing the pointer value being dereferenced. */ + Instruction getBase() { + exists(Expr base | base = lhs.(StarExpr).getBase() or base = lhs.(DerefExpr).getOperand() | + result = evalExprInstruction(base) + ) + } + + override string toString() { result = lhs.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + lhs.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + } + + /** + * Gets the (final) instruction computing the value of `e`. + * + * Note that some expressions (such as type expressions or labels) have no corresponding + * instruction, so this predicate is undefined for them. + * + * Short-circuiting expressions that are purely used for control flow (meaning that their + * value is not stored in a variable or used to compute the value of a non-shortcircuiting + * expression) do not have a final instruction either. + */ + Instruction evalExprInstruction(Expr e) { + result = MkExprNode(e) or + result = evalExprInstruction(e.(ParenExpr).getExpr()) + } + + /** + * Gets the instruction corresponding to the initialization of `r`. + */ + InitParameterInstruction initRecvInstruction(ReceiverVariable r) { result = MkParameterInit(r) } + + /** + * Gets the instruction corresponding to the initialization of `p`. + */ + InitParameterInstruction initParamInstruction(Parameter p) { result = MkParameterInit(p) } + + /** + * Gets the instruction corresponding to the `i`th assignment happening at + * `assgn` (0-based). + */ + AssignInstruction assignInstruction(Assignment assgn, int i) { result = MkAssignNode(assgn, i) } + + /** + * Gets the instruction corresponding to the `i`th initialization happening + * at `spec` (0-based). + */ + AssignInstruction initInstruction(ValueSpec spec, int i) { result = MkAssignNode(spec, i) } + + /** + * Gets the instruction corresponding to the assignment of the key variable + * of range statement `rs`. + */ + AssignInstruction assignKeyInstruction(RangeStmt rs) { result = MkAssignNode(rs, 0) } + + /** + * Gets the instruction corresponding to the assignment of the value variable + * of range statement `rs`. + */ + AssignInstruction assignValueInstruction(RangeStmt rs) { result = MkAssignNode(rs, 1) } + + /** + * Gets the instruction corresponding to the implicit initialization of `v` + * to its zero value. + */ + EvalImplicitInitInstruction implicitInitInstruction(ValueEntity v) { result = MkZeroInitNode(v) } + + /** + * Gets the instruction corresponding to the extraction of the `idx`th element + * of the tuple produced by `base`. + */ + ExtractTupleElementInstruction extractTupleElement(Instruction base, int idx) { + result.extractsElement(base, idx) + } + + /** + * Gets the instruction corresponding to the implicit lower bound of slice `e`, if any. + */ + EvalImplicitLowerSliceBoundInstruction implicitLowerSliceBoundInstruction(SliceExpr e) { + result = MkImplicitLowerSliceBound(e) + } + + /** + * Gets the instruction corresponding to the implicit upper bound of slice `e`, if any. + */ + EvalImplicitUpperSliceBoundInstruction implicitUpperSliceBoundInstruction(SliceExpr e) { + result = MkImplicitUpperSliceBound(e) + } + + /** + * Gets the instruction corresponding to the implicit maximum bound of slice `e`, if any. + */ + EvalImplicitMaxSliceBoundInstruction implicitMaxSliceBoundInstruction(SliceExpr e) { + result = MkImplicitMaxSliceBound(e) + } + + /** + * Gets the implicit dereference instruction for `e`, where `e` is a pointer used as the base + * in a field/method access, element access, or slice expression. + */ + EvalImplicitDerefInstruction implicitDerefInstruction(Expr e) { result = MkImplicitDeref(e) } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/DataFlow.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/DataFlow.qll new file mode 100644 index 00000000000..d99ce3bb554 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/DataFlow.qll @@ -0,0 +1,29 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `semmle.code.go.dataflow.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, invoke `DataFlow::localFlow` or + * `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`. + */ + +import go + +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis. + */ +module DataFlow { + import semmle.go.dataflow.internal.DataFlowImpl + import Properties +} + +class Read = DataFlow::ReadNode; diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/DataFlow2.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/DataFlow2.qll new file mode 100644 index 00000000000..a2bae8bd939 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/DataFlow2.qll @@ -0,0 +1,27 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. To track these types of flow, where the + * exact value may not be preserved, import + * `semmle.code.go.dataflow.TaintTracking`. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow, invoke `DataFlow::localFlow` or + * `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`. + */ + +import go + +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis. + */ +module DataFlow2 { + import semmle.go.dataflow.internal.DataFlowImpl2 + import Properties +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/FunctionInputsAndOutputs.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/FunctionInputsAndOutputs.qll new file mode 100644 index 00000000000..c1653f5b3ad --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/FunctionInputsAndOutputs.qll @@ -0,0 +1,310 @@ +/** + * Provides QL classes for indicating data flow through a function parameter, return value, + * or receiver. + */ + +import go +private import semmle.go.dataflow.internal.DataFlowPrivate + +/** + * An abstract representation of an input to a function, which is either a parameter + * or the receiver parameter. + */ +private newtype TFunctionInput = + TInParameter(int i) { exists(SignatureType s | exists(s.getParameterType(i))) } or + TInReceiver() or + TInResult(int index) { + // the one and only result + index = -1 + or + // one among several results + exists(SignatureType s | exists(s.getResultType(index))) + } + +/** + * An abstract representation of an input to a function, which is either a parameter + * or the receiver parameter. + */ +class FunctionInput extends TFunctionInput { + /** Holds if this represents the `i`th parameter of a function. */ + predicate isParameter(int i) { none() } + + /** Holds if this represents the receiver of a function. */ + predicate isReceiver() { none() } + + /** Holds if this represents the result of a function. */ + predicate isResult() { none() } + + /** Holds if this represents the `i`th result of a function. */ + predicate isResult(int i) { none() } + + /** Gets the data-flow node corresponding to this input for the call `c`. */ + final DataFlow::Node getNode(DataFlow::CallNode c) { result = getEntryNode(c) } + + /** Gets the data-flow node through which data is passed into this input for the call `c`. */ + abstract DataFlow::Node getEntryNode(DataFlow::CallNode c); + + /** Gets the data-flow node through which data from this input enters function `f`. */ + abstract DataFlow::Node getExitNode(FuncDef f); + + /** Gets a textual representation of this element. */ + abstract string toString(); +} + +/** Defines convenience methods that get particular `FunctionInput` instances. */ +module FunctionInput { + /** Gets a `FunctionInput` representing the `i`th parameter. */ + FunctionInput parameter(int i) { result.isParameter(i) } + + /** Gets a `FunctionInput` representing the receiver. */ + FunctionInput receiver() { result.isReceiver() } + + /** Gets a `FunctionInput` representing the result of a single-result function. */ + FunctionInput functionResult() { result.isResult() } + + /** Gets a `FunctionInput` representing the `i`th result. */ + FunctionInput functionResult(int i) { result.isResult(i) } +} + +/** A parameter position of a function, viewed as a source of input. */ +private class ParameterInput extends FunctionInput, TInParameter { + int index; + + ParameterInput() { this = TInParameter(index) } + + override predicate isParameter(int i) { i = index } + + override DataFlow::Node getEntryNode(DataFlow::CallNode c) { result = c.getArgument(index) } + + override DataFlow::Node getExitNode(FuncDef f) { + result = DataFlow::parameterNode(f.getParameter(index)) + } + + override string toString() { result = "parameter " + index } +} + +/** The receiver of a function, viewed as a source of input. */ +private class ReceiverInput extends FunctionInput, TInReceiver { + override predicate isReceiver() { any() } + + override DataFlow::Node getEntryNode(DataFlow::CallNode c) { + result = c.(DataFlow::MethodCallNode).getReceiver() + } + + override DataFlow::Node getExitNode(FuncDef f) { + result = DataFlow::receiverNode(f.(MethodDecl).getReceiver()) + } + + override string toString() { result = "receiver" } +} + +/** + * A result position of a function, viewed as an input. + * + * Results are usually outputs rather than inputs, but for taint tracking it can be useful to + * think of taint propagating backwards from a result of a function to its arguments. For instance, + * the function `bufio.NewWriter` returns a writer `bw` that buffers write operations to an + * underlying writer `w`. If tainted data is written to `bw`, then it makes sense to propagate + * that taint back to the underlying writer `w`, which can be modeled by saying that + * `bufio.NewWriter` propagates taint from its result to its first argument. + */ +private class ResultInput extends FunctionInput, TInResult { + int index; + + ResultInput() { this = TInResult(index) } + + override predicate isResult() { index = -1 } + + override predicate isResult(int i) { + i = 0 and isResult() + or + i = index and i >= 0 + } + + override DataFlow::Node getEntryNode(DataFlow::CallNode c) { + exists(DataFlow::Node pred | + index = -1 and + pred = c.getResult() + or + index >= 0 and + pred = c.getResult(index) + | + // if the result is assigned to an SSA variable, we want to propagate mutations backwards + // through that variable + exists(DataFlow::SsaNode ssa | ssa.getInit() = pred | result = ssa) + or + // otherwise the entry node is simply the result + not exists(DataFlow::SsaNode ssa | ssa.getInit() = pred) and + result = pred + ) + } + + override DataFlow::Node getExitNode(FuncDef f) { none() } + + override string toString() { + index = -1 and result = "result" + or + index >= 0 and result = "result " + index + } +} + +/** + * An abstract representation of an output of a function, which is one of its results. + */ +private newtype TFunctionOutput = + TOutResult(int index) { + // the one and only result + index = -1 + or + // one among several results + exists(SignatureType s | exists(s.getResultType(index))) + } or + TOutReceiver() or + TOutParameter(int index) { exists(SignatureType s | exists(s.getParameterType(index))) } + +/** + * An abstract representation of an output of a function, which is one of its results + * or a parameter with mutable type. + */ +class FunctionOutput extends TFunctionOutput { + /** Holds if this represents the (single) result of a function. */ + predicate isResult() { none() } + + /** Holds if this represents the `i`th result of a function. */ + predicate isResult(int i) { none() } + + /** Holds if this represents the receiver of a function. */ + predicate isReceiver() { none() } + + /** Holds if this represents the `i`th parameter of a function. */ + predicate isParameter(int i) { none() } + + /** Gets the data-flow node corresponding to this output for the call `c`. */ + final DataFlow::Node getNode(DataFlow::CallNode c) { result = getExitNode(c) } + + /** Gets the data-flow node through which data is passed into this output for the function `f`. */ + abstract DataFlow::Node getEntryNode(FuncDef f); + + /** Gets the data-flow node through which data is returned from this output for the call `c`. */ + abstract DataFlow::Node getExitNode(DataFlow::CallNode c); + + /** Gets a textual representation of this element. */ + abstract string toString(); +} + +/** Defines convenience methods that get particular `FunctionOutput` instances. */ +module FunctionOutput { + /** Gets a `FunctionOutput` representing the result of a single-result function. */ + FunctionOutput functionResult() { result.isResult() } + + /** Gets a `FunctionOutput` representing the `i`th result. */ + FunctionOutput functionResult(int i) { result.isResult(i) } + + /** Gets a `FunctionOutput` representing the receiver after a function returns. */ + FunctionOutput receiver() { result.isReceiver() } + + /** Gets a `FunctionOutput` representing the `i`th parameter after a function returns. */ + FunctionOutput parameter(int i) { result.isParameter(i) } +} + +/** A result position of a function, viewed as an output. */ +private class OutResult extends FunctionOutput, TOutResult { + int index; + + OutResult() { this = TOutResult(index) } + + override predicate isResult() { index = -1 } + + override predicate isResult(int i) { + i = 0 and isResult() + or + i = index and i >= 0 + } + + override DataFlow::Node getEntryNode(FuncDef f) { + // return expressions + exists(IR::ReturnInstruction ret | f = ret.getRoot() | + index = -1 and + result = DataFlow::instructionNode(ret.getResult()) + or + index >= 0 and + ret.returnsMultipleResults() and + result = DataFlow::instructionNode(ret.getResult(index)) + ) + or + // expressions assigned to result variables + exists(Write w, int nr | nr = f.getType().getNumResult() | + index = -1 and + nr = 1 and + w.writes(f.getResultVar(0), result) + or + index >= 0 and + nr > 1 and + w.writes(f.getResultVar(index), result) + ) + } + + override DataFlow::Node getExitNode(DataFlow::CallNode c) { + index = -1 and result = c.getResult() + or + result = c.getResult(index) + } + + override string toString() { + index = -1 and result = "result" + or + index >= 0 and result = "result " + index + } +} + +/** The receiver of a function, viewed as an output. */ +private class OutReceiver extends FunctionOutput, TOutReceiver { + override predicate isReceiver() { any() } + + override DataFlow::Node getEntryNode(FuncDef f) { + // there is no generic way of assigning to a receiver; operations that taint a receiver + // have to be handled on a case-by-case basis + none() + } + + override DataFlow::Node getExitNode(DataFlow::CallNode c) { + exists(DataFlow::Node arg | + arg = getArgument(c, -1) and + result.(DataFlow::PostUpdateNode).getPreUpdateNode() = arg + ) + } + + override string toString() { result = "receiver" } +} + +/** + * A parameter of a function, viewed as an output. + * + * Note that slices passed to varargs parameters using `...` are not included, since in this + * case it is ambiguous whether the output should be the slice itself or one of its elements. + */ +private class OutParameter extends FunctionOutput, TOutParameter { + int index; + + OutParameter() { this = TOutParameter(index) } + + override predicate isParameter(int i) { i = index } + + override DataFlow::Node getEntryNode(FuncDef f) { + // there is no generic way of assigning to a parameter; operations that taint a parameter + // have to be handled on a case-by-case basis + none() + } + + override DataFlow::Node getExitNode(DataFlow::CallNode c) { + exists(DataFlow::Node arg | + arg = getArgument(c, index) and + // exclude slices passed to varargs parameters using `...` calls + not (c.hasEllipsis() and index = c.getNumArgument() - 1) + | + result.(DataFlow::PostUpdateNode).getPreUpdateNode() = arg + ) + } + + override string toString() { result = "parameter " + index } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/GlobalValueNumbering.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/GlobalValueNumbering.qll new file mode 100644 index 00000000000..c9c0c58b33e --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/GlobalValueNumbering.qll @@ -0,0 +1,591 @@ +/** + * Provides an implementation of Global Value Numbering. + * See https://en.wikipedia.org/wiki/Global_value_numbering + * + * The predicate `globalValueNumber` converts an expression into a `GVN`, + * which is an abstract type representing the value of the expression. If + * two expressions have the same `GVN` then they compute the same value. + * For example: + * + * ``` + * func f(x int, y int) { + * g(x+y, x+y); + * } + * ``` + * + * In this example, both arguments in the call to `g` compute the same value, + * so both arguments have the same `GVN`. In other words, we can find + * this call with the following query: + * + * ``` + * from CallExpr call, GVN v + * where v = globalValueNumber(call.getArgument(0)) + * and v = globalValueNumber(call.getArgument(1)) + * select call + * ``` + * + * The analysis is conservative, so two expressions might have different + * `GVN`s even though the actually always compute the same value. The most + * common reason for this is that the analysis cannot prove that there + * are no side-effects that might cause the computed value to change. + */ + +/* + * Note to developers: the correctness of this module depends on the + * definitions of GVN, globalValueNumber, and analyzableExpr being kept in + * sync with each other. If you change this module then make sure that the + * change is symmetric across all three. + */ + +import go + +/** + * Holds if the result is a control flow node that might change the + * value of any package variable. This is used in the implementation + * of `MkOtherVariable`, because we need to be quite conservative when + * we assign a value number to a package variable. For example: + * + * ``` + * x = g+1; + * dosomething(); + * y = g+1; + * ``` + * + * It is not safe to assign the same value number to both instances + * of `g+1` in this example, because the call to `dosomething` might + * change the value of `g`. + */ +private ControlFlow::Node nodeWithPossibleSideEffect() { + exists(DataFlow::CallNode call | + call.getCall().mayHaveOwnSideEffects() and + not isPureFn(call.getTarget()) and + result = call.asInstruction() + ) + or + // If the lhs of an assignment is not analyzable by SSA, then + // we need to treat the assignment as having a possible side-effect. + result instanceof Write and + not exists(SsaExplicitDefinition ssa | result = ssa.getInstruction()) +} + +private predicate isPureFn(Function f) { + f.(BuiltinFunction).isPure() + or + isPureStmt(f.(DeclaredFunction).getBody()) +} + +private predicate isPureStmt(Stmt s) { + exists(BlockStmt blk | blk = s | forall(Stmt ch | ch = blk.getAStmt() | isPureStmt(ch))) + or + isPureExpr(s.(ReturnStmt).getExpr()) +} + +private predicate isPureExpr(Expr e) { + e instanceof BasicLit + or + exists(FuncDef f | f = e.getEnclosingFunction() | + e = f.getAParameter().getAReference() + or + e = f.(MethodDecl).getReceiver().getAReference() + ) + or + isPureExpr(e.(SelectorExpr).getBase()) + or + exists(CallExpr ce | e = ce | + isPureFn(ce.getTarget()) and + forall(Expr arg | arg = ce.getAnArgument() | isPureExpr(arg)) + ) +} + +/** + * Gets the entry node of the control flow graph of which `node` is a + * member. + */ +private ControlFlow::Node getControlFlowEntry(ControlFlow::Node node) { + result = node.getRoot().getEntryNode() +} + +private predicate entryNode(ControlFlow::Node node) { node.isEntryNode() } + +/** + * Holds if there is a control flow edge from `src` to `dst` or + * if `dst` is an expression with a possible side-effect. The idea + * is to treat side effects as entry points in the control flow + * graph so that we can use the dominator tree to find the most recent + * side-effect. + */ +private predicate sideEffectCFG(ControlFlow::Node src, ControlFlow::Node dst) { + src.getASuccessor() = dst + or + // Add an edge from the entry point to any node that might have a side + // effect. + dst = nodeWithPossibleSideEffect() and + src = getControlFlowEntry(dst) +} + +/** + * Holds if `dominator` is the immediate dominator of `node` in + * the side-effect CFG. + */ +private predicate iDomEffect(ControlFlow::Node dominator, ControlFlow::Node node) = + idominance(entryNode/1, sideEffectCFG/2)(_, dominator, node) + +/** + * Gets the most recent side effect. To be more precise, `result` is a + * dominator of `node` and no side-effects can occur between `result` and + * `node`. + * + * `sideEffectCFG` has an edge from the function entry to every node with a + * side-effect. This means that every node with a side-effect has the + * function entry as its immediate dominator. So if node `x` dominates node + * `y` then there can be no side effects between `x` and `y` unless `x` is + * the function entry. So the optimal choice for `result` has the function + * entry as its immediate dominator. + * + * Example: + * + * ``` + * 000: int f(int a, int b, int *p) { + * 001: int r = 0; + * 002: if (a) { + * 003: if (b) { + * 004: sideEffect1(); + * 005: } + * 006: } else { + * 007: sideEffect2(); + * 008: } + * 009: if (a) { + * 010: r++; // Not a side-effect, because r is an SSA variable. + * 011: } + * 012: if (b) { + * 013: r++; // Not a side-effect, because r is an SSA variable. + * 014: } + * 015: return *p; + * 016: } + * ``` + * + * Suppose we want to find the most recent side-effect for the dereference + * of `p` on line 015. The `sideEffectCFG` has an edge from the function + * entry (line 000) to the side effects at lines 004 and 007. Therefore, + * the immediate dominator tree looks like this: + * + * 000 - 001 - 002 - 003 + * - 004 + * - 007 + * - 009 - 010 + * - 012 - 013 + * - 015 + * + * The immediate dominator path to line 015 is 000 - 009 - 012 - 015. + * Therefore, the most recent side effect for line 015 is line 009. + */ +cached +private ControlFlow::Node mostRecentSideEffect(ControlFlow::Node node) { + exists(ControlFlow::Node entry | + entryNode(entry) and + iDomEffect(entry, result) and + iDomEffect*(result, node) + ) +} + +/** Used to represent the "global value number" of an expression. */ +cached +private newtype GVNBase = + MkNumericConst(string val) { mkNumericConst(_, val) } or + MkStringConst(string val) { mkStringConst(_, val) } or + MkBoolConst(boolean val) { mkBoolConst(_, val) } or + MkIndirectSsa(SsaDefinition def) { not ssaInit(def, _) } or + MkFunc(Function fn) { mkFunc(_, fn) } or + // Variables with no SSA information. As a crude (but safe) + // approximation, we use `mostRecentSideEffect` to compute a definition + // location for the variable. This ensures that two instances of the same + // global variable will only get the same value number if they are + // guaranteed to have the same value. + MkOtherVariable(ValueEntity x, ControlFlow::Node dominator) { mkOtherVariable(_, x, dominator) } or + MkMethodAccess(GVN base, Function m) { mkMethodAccess(_, base, m) } or + MkFieldRead(GVN base, Field f, ControlFlow::Node dominator) { mkFieldRead(_, base, f, dominator) } or + MkPureCall(Function f, GVN callee, GVNList args) { mkPureCall(_, f, callee, args) } or + MkIndex(GVN base, GVN index, ControlFlow::Node dominator) { mkIndex(_, base, index, dominator) } or + // Dereference a pointer. The value might have changed since the last + // time the pointer was dereferenced, so we need to include a definition + // location. As a crude (but safe) approximation, we use + // `mostRecentSideEffect` to compute a definition location. + MkDeref(GVN base, ControlFlow::Node dominator) { mkDeref(_, base, dominator) } or + MkBinaryOp(GVN lhs, GVN rhs, string op) { mkBinaryOp(_, lhs, rhs, op) } or + MkUnaryOp(GVN child, string op) { mkUnaryOp(_, child, op) } or + // Any expression that is not handled by the cases above is + // given a unique number based on the expression itself. + MkUnanalyzable(DataFlow::Node e) { not analyzableExpr(e) } + +private newtype GVNList = + MkNil() or + MkCons(GVN head, GVNList tail) { globalValueNumbers(_, _, head, tail) } + +private GVNList globalValueNumbers(DataFlow::CallNode ce, int start) { + analyzableCall(ce, _) and + start = ce.getNumArgument() and + result = MkNil() + or + exists(GVN head, GVNList tail | + globalValueNumbers(ce, start, head, tail) and + result = MkCons(head, tail) + ) +} + +private predicate globalValueNumbers(DataFlow::CallNode ce, int start, GVN head, GVNList tail) { + analyzableCall(ce, _) and + head = globalValueNumber(ce.getArgument(start)) and + tail = globalValueNumbers(ce, start + 1) +} + +/** + * A Global Value Number. A GVN is an abstract representation of the value + * computed by an expression. The relationship between `Expr` and `GVN` is + * many-to-one: every `Expr` has exactly one `GVN`, but multiple + * expressions can have the same `GVN`. If two expressions have the same + * `GVN`, it means that they compute the same value at run time. The `GVN` + * is an opaque value, so you cannot deduce what the run-time value of an + * expression will be from its `GVN`. The only use for the `GVN` of an + * expression is to find other expressions that compute the same value. + * Use the predicate `globalValueNumber` to get the `GVN` for an `Expr`. + * + * Note: `GVN` has `toString` and `getLocation` methods, so that it can be + * displayed in a results list. These work by picking an arbitrary + * expression with this `GVN` and using its `toString` and `getLocation` + * methods. + */ +class GVN extends GVNBase { + GVN() { this instanceof GVNBase } + + /** Gets a data-flow node that has this GVN. */ + DataFlow::Node getANode() { this = globalValueNumber(result) } + + /** Gets the kind of the GVN. This can be useful for debugging. */ + string getKind() { + this instanceof MkNumericConst and result = "NumericConst" + or + this instanceof MkStringConst and result = "StringConst" + or + this instanceof MkBoolConst and result = "BoolConst" + or + this instanceof MkIndirectSsa and result = "IndirectSsa" + or + this instanceof MkFunc and result = "Func" + or + this instanceof MkOtherVariable and result = "OtherVariable" + or + this instanceof MkMethodAccess and result = "MethodAccess" + or + this instanceof MkFieldRead and result = "FieldRead" + or + this instanceof MkPureCall and result = "PureCall" + or + this instanceof MkIndex and result = "Index" + or + this instanceof MkDeref and result = "Deref" + or + this instanceof MkBinaryOp and result = "BinaryOp" + or + this instanceof MkUnaryOp and result = "UnaryOp" + or + this instanceof MkUnanalyzable and result = "Unanalyzable" + } + + /** + * Gets an example of a data-flow node with this GVN. + * This is useful for things like implementing toString(). + */ + private DataFlow::Node exampleNode() { + // Pick the expression with the minimum source location. This is + // just an arbitrary way to pick an expression with this `GVN`. + result = + min(DataFlow::Node e, string f, int l, int c, string k | + e = getANode() and e.hasLocationInfo(f, l, c, _, _) and k = e.getNodeKind() + | + e order by f, l, c, k + ) + } + + /** Gets a textual representation of this element. */ + string toString() { result = exampleNode().toString() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exampleNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private predicate mkNumericConst(DataFlow::Node nd, string val) { + nd.getType().getUnderlyingType() instanceof NumericType and + val = nd.getExactValue() and + nd.isPlatformIndependentConstant() +} + +private predicate mkStringConst(DataFlow::Node nd, string val) { + val = nd.getStringValue() and + nd.isPlatformIndependentConstant() +} + +private predicate mkBoolConst(DataFlow::Node nd, boolean val) { + val = nd.getBoolValue() and + nd.isPlatformIndependentConstant() +} + +private predicate mkFunc(DataFlow::Node nd, Function f) { + nd = f.getARead() and + not f instanceof Method +} + +private predicate analyzableConst(DataFlow::Node e) { + mkNumericConst(e, _) or mkStringConst(e, _) or mkBoolConst(e, _) or mkFunc(e, _) +} + +private predicate analyzableMethodAccess(Read access, DataFlow::Node receiver, Method m) { + exists(IR::ReadInstruction r | r = access.asInstruction() | + r.readsMethod(receiver.asInstruction(), m) and + not r.isConst() + ) +} + +private predicate mkMethodAccess(DataFlow::Node access, GVN qualifier, Method m) { + exists(DataFlow::Node base | + analyzableMethodAccess(access, base, m) and + qualifier = globalValueNumber(base) + ) +} + +private predicate analyzableFieldRead(Read fread, DataFlow::Node base, Field f) { + exists(IR::ReadInstruction r | r = fread.asInstruction() | + r.readsField(base.asInstruction(), f) and + strictcount(mostRecentSideEffect(r)) = 1 and + not r.isConst() + ) +} + +private predicate mkFieldRead( + DataFlow::Node fread, GVN qualifier, Field v, ControlFlow::Node dominator +) { + exists(DataFlow::Node base | + analyzableFieldRead(fread, base, v) and + qualifier = globalValueNumber(base) and + dominator = mostRecentSideEffect(fread.asInstruction()) + ) +} + +private predicate analyzableCall(DataFlow::CallNode ce, Function f) { + f = ce.getTarget() and + isPureFn(f) and + not ce.isConst() +} + +private predicate mkPureCall(DataFlow::CallNode ce, Function f, GVN callee, GVNList args) { + analyzableCall(ce, f) and + callee = globalValueNumber(ce.getCalleeNode()) and + args = globalValueNumbers(ce, 0) +} + +/** + * Holds if `v` is a variable whose value changes are not, or at least not fully, captured by SSA. + * + * This is the case for package variables (for which no SSA information exists), but also for + * variables of non-primitive type (for which deep mutations are not captured by SSA). + */ +private predicate incompleteSsa(ValueEntity v) { + not v instanceof Field and + ( + not v instanceof SsaSourceVariable + or + v.(SsaSourceVariable).mayHaveIndirectReferences() + or + exists(Type tp | tp = v.(DeclaredVariable).getType().getUnderlyingType() | + not tp instanceof BasicType + ) + ) +} + +/** + * Holds if `access` is an access to a variable `target` for which SSA information is incomplete. + */ +private predicate analyzableOtherVariable(DataFlow::Node access, ValueEntity target) { + access.asInstruction().reads(target) and + incompleteSsa(target) and + strictcount(mostRecentSideEffect(access.asInstruction())) = 1 and + not access.isConst() and + not target instanceof Function +} + +private predicate mkOtherVariable(DataFlow::Node access, ValueEntity x, ControlFlow::Node dominator) { + analyzableOtherVariable(access, x) and + dominator = mostRecentSideEffect(access.asInstruction()) +} + +private predicate analyzableBinaryOp( + DataFlow::BinaryOperationNode op, string opname, DataFlow::Node lhs, DataFlow::Node rhs +) { + opname = op.getOperator() and + not op.mayHaveSideEffects() and + lhs = op.getLeftOperand() and + rhs = op.getRightOperand() and + not op.isConst() +} + +private predicate mkBinaryOp(DataFlow::Node op, GVN lhs, GVN rhs, string opname) { + exists(DataFlow::Node l, DataFlow::Node r | + analyzableBinaryOp(op, opname, l, r) and + lhs = globalValueNumber(l) and + rhs = globalValueNumber(r) + ) +} + +private predicate analyzableUnaryOp(DataFlow::UnaryOperationNode op) { + not op.mayHaveSideEffects() and + not op.isConst() +} + +private predicate mkUnaryOp(DataFlow::UnaryOperationNode op, GVN child, string opname) { + analyzableUnaryOp(op) and + child = globalValueNumber(op.getOperand()) and + opname = op.getOperator() +} + +private predicate analyzableIndexExpr(DataFlow::ElementReadNode ae) { + strictcount(mostRecentSideEffect(ae.asInstruction())) = 1 and + not ae.isConst() +} + +private predicate mkIndex( + DataFlow::ElementReadNode ae, GVN base, GVN offset, ControlFlow::Node dominator +) { + analyzableIndexExpr(ae) and + base = globalValueNumber(ae.getBase()) and + offset = globalValueNumber(ae.getIndex()) and + dominator = mostRecentSideEffect(ae.asInstruction()) +} + +private predicate analyzablePointerDereferenceExpr(DataFlow::PointerDereferenceNode deref) { + strictcount(mostRecentSideEffect(deref.asInstruction())) = 1 and + not deref.isConst() +} + +private predicate mkDeref(DataFlow::PointerDereferenceNode deref, GVN p, ControlFlow::Node dominator) { + analyzablePointerDereferenceExpr(deref) and + p = globalValueNumber(deref.getOperand()) and + dominator = mostRecentSideEffect(deref.asInstruction()) +} + +private predicate ssaInit(SsaExplicitDefinition ssa, DataFlow::Node rhs) { + ssa.getRhs() = rhs.asInstruction() +} + +/** Gets the global value number of data-flow node `nd`. */ +cached +GVN globalValueNumber(DataFlow::Node nd) { + exists(string val | + mkNumericConst(nd, val) and + result = MkNumericConst(val) + ) + or + exists(string val | + mkStringConst(nd, val) and + result = MkStringConst(val) + ) + or + exists(boolean val | + mkBoolConst(nd, val) and + result = MkBoolConst(val) + ) + or + exists(Function f | + mkFunc(nd, f) and + result = MkFunc(f) + ) + or + exists(ValueEntity x, ControlFlow::Node dominator | + mkOtherVariable(nd, x, dominator) and + result = MkOtherVariable(x, dominator) + ) + or + exists(GVN qualifier, Function target | + mkMethodAccess(nd, qualifier, target) and + result = MkMethodAccess(qualifier, target) + ) + or + exists(GVN qualifier, Entity target, ControlFlow::Node dominator | + mkFieldRead(nd, qualifier, target, dominator) and + result = MkFieldRead(qualifier, target, dominator) + ) + or + exists(Function f, GVN callee, GVNList args | + mkPureCall(nd, f, callee, args) and + result = MkPureCall(f, callee, args) + ) + or + exists(GVN lhs, GVN rhs, string opname | + mkBinaryOp(nd, lhs, rhs, opname) and + result = MkBinaryOp(lhs, rhs, opname) + ) + or + exists(GVN child, string opname | + mkUnaryOp(nd, child, opname) and + result = MkUnaryOp(child, opname) + ) + or + exists(GVN x, GVN i, ControlFlow::Node dominator | + mkIndex(nd, x, i, dominator) and + result = MkIndex(x, i, dominator) + ) + or + exists(GVN p, ControlFlow::Node dominator | + mkDeref(nd, p, dominator) and + result = MkDeref(p, dominator) + ) + or + not analyzableExpr(nd) and + result = MkUnanalyzable(nd) + or + exists(DataFlow::SsaNode ssa | + nd = ssa.getAUse() and + not incompleteSsa(ssa.getSourceVariable()) and + result = globalValueNumber(ssa) + ) + or + exists(SsaDefinition ssa | ssa = nd.(DataFlow::SsaNode).getDefinition() | + // Local variable with a defining value. + exists(DataFlow::Node init | + ssaInit(ssa, init) and + result = globalValueNumber(init) + ) + or + // Local variable without a defining value. + not ssaInit(ssa, _) and + result = MkIndirectSsa(ssa) + ) +} + +/** + * Holds if the expression is explicitly handled by `globalValueNumber`. + * Unanalyzable expressions still need to be given a global value number, + * but it will be a unique number that is not shared with any other + * expression. + */ +private predicate analyzableExpr(DataFlow::Node e) { + analyzableConst(e) or + any(DataFlow::SsaNode ssa).getAUse() = e or + e instanceof DataFlow::SsaNode or + analyzableOtherVariable(e, _) or + analyzableMethodAccess(e, _, _) or + analyzableFieldRead(e, _, _) or + analyzableCall(e, _) or + analyzableBinaryOp(e, _, _, _) or + analyzableUnaryOp(e) or + analyzableIndexExpr(e) or + analyzablePointerDereferenceExpr(e) +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/Properties.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/Properties.qll new file mode 100644 index 00000000000..ac45742f4a0 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/Properties.qll @@ -0,0 +1,101 @@ +/** + * Provides a class for representing and reasoning about properties of data-flow nodes. + */ + +import go + +private newtype TProperty = + IsBoolean(Boolean b) or + IsNil(Boolean b) + +/** + * A property which may or may not hold of a data-flow node. + * + * Supported properties currently are Boolean truth and `nil`-ness. + */ +class Property extends TProperty { + private predicate checkOnExpr(Expr test, Boolean outcome, DataFlow::Node nd) { + exists(EqualityTestExpr eq, Expr e, boolean isTrue | + eq = test and eq.hasOperands(nd.asExpr(), e) + | + this = IsBoolean(isTrue) and + isTrue = eq.getPolarity().booleanXor(e.getBoolValue().booleanXor(outcome)) + or + this = IsNil(isTrue) and + e = Builtin::nil().getAReference() and + isTrue = eq.getPolarity().booleanXor(outcome).booleanNot() + ) + or + // if test = outcome ==> nd matches this + // then !test = !outcome ==> nd matches this + this.checkOnExpr(test.(NotExpr).getOperand(), outcome.booleanNot(), nd) + or + // if test = outcome ==> nd matches this + // then (test) = outcome ==> nd matches this + this.checkOnExpr(test.(ParenExpr).getExpr(), outcome, nd) + or + // if test = true ==> nd matches this + // then (test && e) = true ==> nd matches this + outcome = true and + this.checkOnExpr(test.(LandExpr).getAnOperand(), outcome, nd) + or + // if test = false ==> nd matches this + // then (test || e) = false ==> nd matches this + outcome = false and + this.checkOnExpr(test.(LorExpr).getAnOperand(), outcome, nd) + or + test = nd.asExpr() and + test instanceof ValueExpr and + test.getType().getUnderlyingType() instanceof BoolType and + this = IsBoolean(outcome) + } + + /** + * Holds if `test` evaluating to `outcome` means that this property holds of `nd`, where `nd` is a + * subexpression of `test`. + */ + predicate checkOn(DataFlow::Node test, Boolean outcome, DataFlow::Node nd) { + checkOnExpr(test.asExpr(), outcome, nd) + } + + /** Holds if this is the property of having the Boolean value `b`. */ + predicate isBoolean(boolean b) { this = IsBoolean(b) } + + /** Returns the boolean represented by this property if it is a boolean. */ + boolean asBoolean() { this = IsBoolean(result) } + + /** Holds if this is the property of being `nil`. */ + predicate isNil() { this = IsNil(true) } + + /** Holds if this is the property of being non-`nil`. */ + predicate isNonNil() { this = IsNil(false) } + + /** Gets a textual representation of this property. */ + string toString() { + exists(boolean b | + this = IsBoolean(b) and + result = "is " + b + ) + or + this = IsNil(true) and + result = "is nil" + or + this = IsNil(false) and + result = "is not nil" + } +} + +/** + * Gets a `Property` representing truth outcome `b`. + */ +Property booleanProperty(boolean b) { result = IsBoolean(b) } + +/** + * Gets a `Property` representing `nil`-ness. + */ +Property nilProperty() { result = IsNil(true) } + +/** + * Gets a `Property` representing non-`nil`-ness. + */ +Property notNilProperty() { result = IsNil(false) } diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/SSA.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/SSA.qll new file mode 100644 index 00000000000..e9e9ce84b2a --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/SSA.qll @@ -0,0 +1,407 @@ +/** + * Provides classes for working with static single assignment form (SSA). + */ + +import go +private import SsaImpl + +/** + * A variable that can be SSA converted, that is, a local variable, but not a variable + * declared in file scope. + */ +class SsaSourceVariable extends LocalVariable { + SsaSourceVariable() { not getScope() instanceof FileScope } + + /** + * Holds if there may be indirect references of this variable that are not covered by `getAReference()`. + * + * This is the case for variables that have their address taken, and for variables whose + * name resolution information may be incomplete (for instance due to an extractor error). + */ + predicate mayHaveIndirectReferences() { + // variables that have their address taken + exists(AddressExpr addr | addr.getOperand().stripParens() = getAReference()) + or + exists(DataFlow::MethodReadNode mrn | + mrn.getReceiver() = getARead() and + mrn.getMethod().getReceiverType() instanceof PointerType + ) + or + // variables where there is an unresolved reference with the same name in the same + // scope or a nested scope, suggesting that name resolution information may be incomplete + exists(FunctionScope scope, FuncDef inner | + scope = this.getScope().(LocalScope).getEnclosingFunctionScope() and + unresolvedReference(getName(), inner) and + inner.getScope().getOuterScope*() = scope + ) + } +} + +/** + * Holds if there is an unresolved reference to `name` in `fn`. + */ +private predicate unresolvedReference(string name, FuncDef fn) { + exists(Ident unresolved | + unresolvedIdentifier(unresolved, name) and + not unresolved = any(SelectorExpr sel).getSelector() and + fn = unresolved.getEnclosingFunction() + ) +} + +/** + * Holds if `id` is an unresolved identifier with the given `name`. + */ +pragma[noinline] +private predicate unresolvedIdentifier(Ident id, string name) { + id.getName() = name and + id instanceof ReferenceExpr and + not id.refersTo(_) +} + +/** + * An SSA variable. + */ +class SsaVariable extends TSsaDefinition { + /** Gets the source variable corresponding to this SSA variable. */ + SsaSourceVariable getSourceVariable() { result = this.(SsaDefinition).getSourceVariable() } + + /** Gets the (unique) definition of this SSA variable. */ + SsaDefinition getDefinition() { result = this } + + /** Gets the type of this SSA variable. */ + Type getType() { result = getSourceVariable().getType() } + + /** Gets a use in basic block `bb` that refers to this SSA variable. */ + IR::Instruction getAUseIn(ReachableBasicBlock bb) { + exists(int i, SsaSourceVariable v | v = getSourceVariable() | + result = bb.getNode(i) and + this = getDefinition(bb, i, v) + ) + } + + /** Gets a use that refers to this SSA variable. */ + IR::Instruction getAUse() { result = getAUseIn(_) } + + /** Gets a textual representation of this element. */ + string toString() { result = getDefinition().prettyPrintRef() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getDefinition().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * An SSA definition. + */ +class SsaDefinition extends TSsaDefinition { + /** Gets the SSA variable defined by this definition. */ + SsaVariable getVariable() { result = this } + + /** Gets the source variable defined by this definition. */ + abstract SsaSourceVariable getSourceVariable(); + + /** + * Gets the basic block to which this definition belongs. + */ + abstract ReachableBasicBlock getBasicBlock(); + + /** + * INTERNAL: Use `getBasicBlock()` and `getSourceVariable()` instead. + * + * Holds if this is a definition of source variable `v` at index `idx` in basic block `bb`. + * + * Phi nodes are considered to be at index `-1`, all other definitions at the index of + * the control flow node they correspond to. + */ + abstract predicate definesAt(ReachableBasicBlock bb, int idx, SsaSourceVariable v); + + /** + * INTERNAL: Use `toString()` instead. + * + * Gets a pretty-printed representation of this SSA definition. + */ + abstract string prettyPrintDef(); + + /** + * INTERNAL: Do not use. + * + * Gets a pretty-printed representation of a reference to this SSA definition. + */ + abstract string prettyPrintRef(); + + /** Gets the innermost function or file to which this SSA definition belongs. */ + ControlFlow::Root getRoot() { result = getBasicBlock().getRoot() } + + /** Gets a textual representation of this element. */ + string toString() { result = prettyPrintDef() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + abstract predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ); +} + +/** + * An SSA definition that corresponds to an explicit assignment or other variable definition. + */ +class SsaExplicitDefinition extends SsaDefinition, TExplicitDef { + /** Gets the instruction where the definition happens. */ + IR::Instruction getInstruction() { + exists(BasicBlock bb, int i | this = TExplicitDef(bb, i, _) | result = bb.getNode(i)) + } + + /** Gets the right-hand side of the definition. */ + IR::Instruction getRhs() { getInstruction().writes(_, result) } + + override predicate definesAt(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + this = TExplicitDef(bb, i, v) + } + + override ReachableBasicBlock getBasicBlock() { definesAt(result, _, _) } + + override SsaSourceVariable getSourceVariable() { this = TExplicitDef(_, _, result) } + + override string prettyPrintRef() { + exists(int l, int c | hasLocationInfo(_, l, c, _, _) | result = "def@" + l + ":" + c) + } + + override string prettyPrintDef() { result = "definition of " + getSourceVariable() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + getInstruction().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Provides a helper predicate for working with explicit SSA definitions. */ +module SsaExplicitDefinition { + /** + * Gets the SSA definition corresponding to definition `def`. + */ + SsaExplicitDefinition of(IR::Instruction def) { result.getInstruction() = def } +} + +/** + * An SSA definition that does not correspond to an explicit variable definition. + */ +abstract class SsaImplicitDefinition extends SsaDefinition { + /** + * INTERNAL: Do not use. + * + * Gets the definition kind to include in `prettyPrintRef`. + */ + abstract string getKind(); + + override string prettyPrintRef() { + exists(int l, int c | hasLocationInfo(_, l, c, _, _) | result = getKind() + "@" + l + ":" + c) + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + endline = startline and + endcolumn = startcolumn and + getBasicBlock().hasLocationInfo(filepath, startline, startcolumn, _, _) + } +} + +/** + * An SSA definition representing the capturing of an SSA-convertible variable + * in the closure of a nested function. + * + * Capturing definitions appear at the beginning of such functions, as well as + * at any function call that may affect the value of the variable. + */ +class SsaVariableCapture extends SsaImplicitDefinition, TCapture { + override predicate definesAt(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + this = TCapture(bb, i, v) + } + + override ReachableBasicBlock getBasicBlock() { definesAt(result, _, _) } + + override SsaSourceVariable getSourceVariable() { definesAt(_, _, result) } + + override string getKind() { result = "capture" } + + override string prettyPrintDef() { result = "capture variable " + getSourceVariable() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + exists(ReachableBasicBlock bb, int i | definesAt(bb, i, _) | + bb.getNode(i).hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) + } +} + +/** + * An SSA definition such as a phi node that has no actual semantics, but simply serves to + * merge or filter data flow. + */ +abstract class SsaPseudoDefinition extends SsaImplicitDefinition { + /** + * Gets an input of this pseudo-definition. + */ + abstract SsaVariable getAnInput(); + + /** + * Gets a textual representation of the inputs of this pseudo-definition + * in lexicographical order. + */ + string ppInputs() { result = concat(getAnInput().getDefinition().prettyPrintRef(), ", ") } +} + +/** + * An SSA phi node, that is, a pseudo-definition for a variable at a point + * in the flow graph where otherwise two or more definitions for the variable + * would be visible. + */ +class SsaPhiNode extends SsaPseudoDefinition, TPhi { + override SsaVariable getAnInput() { + result = getDefReachingEndOf(getBasicBlock().getAPredecessor(), getSourceVariable()) + } + + override predicate definesAt(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + bb = getBasicBlock() and v = getSourceVariable() and i = -1 + } + + override ReachableBasicBlock getBasicBlock() { this = TPhi(result, _) } + + override SsaSourceVariable getSourceVariable() { this = TPhi(_, result) } + + override string getKind() { result = "phi" } + + override string prettyPrintDef() { result = getSourceVariable() + " = phi(" + ppInputs() + ")" } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + endline = startline and + endcolumn = startcolumn and + getBasicBlock().hasLocationInfo(filepath, startline, startcolumn, _, _) + } +} + +/** + * An SSA variable, possibly with a chain of field reads on it. + */ +private newtype TSsaWithFields = + TRoot(SsaVariable v) or + TStep(SsaWithFields base, Field f) { exists(accessPathAux(base, f)) } + +/** + * Gets a representation of `nd` as an ssa-with-fields value if there is one. + */ +private TSsaWithFields accessPath(IR::Instruction insn) { + exists(SsaVariable v | insn = v.getAUse() | result = TRoot(v)) + or + exists(SsaWithFields base, Field f | insn = accessPathAux(base, f) | result = TStep(base, f)) +} + +/** + * Gets a data-flow node that reads a field `f` from a node that is represented + * by ssa-with-fields value `base`. + */ +private IR::Instruction accessPathAux(TSsaWithFields base, Field f) { + exists(IR::FieldReadInstruction fr, IR::Instruction frb | + fr.getBase() = frb or + fr.getBase() = IR::implicitDerefInstruction(frb.(IR::EvalInstruction).getExpr()) + | + base = accessPath(frb) and + f = fr.getField() and + result = fr + ) +} + +/** An SSA variable with zero or more fields read from it. */ +class SsaWithFields extends TSsaWithFields { + /** + * Gets the SSA variable corresponding to the base of this SSA variable with fields. + * + * For example, the SSA variable corresponding to `a` for the SSA variable with fields + * corresponding to `a.b`. + */ + SsaVariable getBaseVariable() { + this = TRoot(result) + or + exists(SsaWithFields base, Field f | this = TStep(base, f) | result = base.getBaseVariable()) + } + + /** Gets a use that refers to this SSA variable with fields. */ + DataFlow::Node getAUse() { this = accessPath(result.asInstruction()) } + + /** Gets the type of this SSA variable with fields. */ + Type getType() { + exists(SsaVariable var | this = TRoot(var) | result = var.getType()) + or + exists(Field f | this = TStep(_, f) | result = f.getType()) + } + + /** Gets a textual representation of this element. */ + string toString() { + exists(SsaVariable var | this = TRoot(var) | result = "(" + var + ")") + or + exists(SsaWithFields base, Field f | this = TStep(base, f) | result = base + "." + f.getName()) + } + + /** + * Gets an SSA-with-fields variable that is similar to this SSA-with-fields variable in the + * sense that it has the same root variable and the same sequence of field accesses. + */ + SsaWithFields similar() { + result.getBaseVariable().getSourceVariable() = this.getBaseVariable().getSourceVariable() and + result.getQualifiedName() = this.getQualifiedName() + } + + /** + * Gets the qualified name of the source variable or variable and fields that this represents. + * + * For example, for an SSA variable that represents the field `a.b`, this would get the string + * `"a.b"`. + */ + string getQualifiedName() { + exists(SsaVariable v | this = TRoot(v) and result = v.getSourceVariable().getName()) + or + exists(SsaWithFields base, Field f | this = TStep(base, f) | + result = base.getQualifiedName() + "." + f.getName() + ) + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getBaseVariable().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets a read similar to `node`, according to the same rules as `SsaWithFields.similar()`. + */ +DataFlow::Node getASimilarReadNode(DataFlow::Node node) { + exists(SsaWithFields readFields | node = readFields.getAUse() | + result = readFields.similar().getAUse() + ) +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/SsaImpl.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/SsaImpl.qll new file mode 100644 index 00000000000..a15e8595b99 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/SsaImpl.qll @@ -0,0 +1,295 @@ +/** + * INTERNAL: Analyses should use module `SSA` instead. + * + * Provides predicates for constructing an SSA representation for functions. + */ + +import go + +cached +private module Internal { + /** Holds if the `i`th node of `bb` defines `v`. */ + cached + predicate defAt(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + bb.getNode(i).(IR::Instruction).writes(v, _) + } + + /** Holds if the `i`th node of `bb` reads `v`. */ + cached + predicate useAt(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + bb.getNode(i).(IR::Instruction).reads(v) + } + + /** + * A data type representing SSA definitions. + * + * We distinguish three kinds of SSA definitions: + * + * 1. Variable definitions, including declarations, assignments and increments/decrements. + * 2. Pseudo-definitions for captured variables at the beginning of the capturing function + * as well as after calls. + * 3. Phi nodes. + * + * SSA definitions are only introduced where necessary. In particular, + * unreachable code has no SSA definitions associated with it, and neither + * have dead assignments (that is, assignments whose value is never read). + */ + cached + newtype TSsaDefinition = + /** + * An SSA definition that corresponds to an explicit assignment or other variable definition. + */ + TExplicitDef(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + defAt(bb, i, v) and + (liveAfterDef(bb, i, v) or v.isCaptured()) + } or + /** + * An SSA definition representing the capturing of an SSA-convertible variable + * in the closure of a nested function. + * + * Capturing definitions appear at the beginning of such functions, as well as + * at any function call that may affect the value of the variable. + */ + TCapture(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + mayCapture(bb, i, v) and + liveAfterDef(bb, i, v) + } or + /** + * An SSA phi node, that is, a pseudo-definition for a variable at a point + * in the flow graph where otherwise two or more definitions for the variable + * would be visible. + */ + TPhi(ReachableJoinBlock bb, SsaSourceVariable v) { + liveAtEntry(bb, v) and + inDefDominanceFrontier(bb, v) + } + + /** + * Holds if `bb` is in the dominance frontier of a block containing a definition of `v`. + */ + pragma[noinline] + private predicate inDefDominanceFrontier(ReachableJoinBlock bb, SsaSourceVariable v) { + exists(ReachableBasicBlock defbb, SsaDefinition def | + def.definesAt(defbb, _, v) and + bb.inDominanceFrontierOf(defbb) + ) + } + + /** + * Holds if `v` is a captured variable which is declared in `declFun` and read in `useFun`. + */ + private predicate readsCapturedVar(FuncDef useFun, SsaSourceVariable v, FuncDef declFun) { + declFun = v.getDeclaringFunction() and + useFun = any(IR::Instruction u | u.reads(v)).getRoot() and + v.isCaptured() + } + + /** Holds if the `i`th node of `bb` in function `f` is an entry node. */ + private predicate entryNode(FuncDef f, ReachableBasicBlock bb, int i) { + f = bb.getRoot() and + bb.getNode(i).isEntryNode() + } + + /** + * Holds if the `i`th node of `bb` in function `f` is a function call. + */ + private predicate callNode(FuncDef f, ReachableBasicBlock bb, int i) { + f = bb.getRoot() and + bb.getNode(i).(IR::EvalInstruction).getExpr() instanceof CallExpr + } + + /** + * Holds if the `i`th node of basic block `bb` may induce a pseudo-definition for + * modelling updates to captured variable `v`. Whether the definition is actually + * introduced depends on whether `v` is live at this point in the program. + */ + private predicate mayCapture(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + exists(FuncDef capturingContainer, FuncDef declContainer | + // capture initial value of variable declared in enclosing scope + readsCapturedVar(capturingContainer, v, declContainer) and + capturingContainer != declContainer and + entryNode(capturingContainer, bb, i) + or + // re-capture value of variable after a call if it is assigned non-locally + readsCapturedVar(capturingContainer, v, declContainer) and + assignedThroughClosure(v) and + callNode(capturingContainer, bb, i) + ) + } + + /** A classification of variable references into reads and writes. */ + private newtype RefKind = + ReadRef() or + WriteRef() + + /** + * Holds if the `i`th node of basic block `bb` is a reference to `v`, either a read + * (when `tp` is `ReadRef()`) or a direct or indirect write (when `tp` is `WriteRef()`). + */ + private predicate ref(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind tp) { + useAt(bb, i, v) and tp = ReadRef() + or + (mayCapture(bb, i, v) or defAt(bb, i, v)) and + tp = WriteRef() + } + + /** + * Gets the (1-based) rank of the reference to `v` at the `i`th node of basic block `bb`, + * which has the given reference kind `tp`. + */ + private int refRank(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind tp) { + i = rank[result](int j | ref(bb, j, v, _)) and + ref(bb, i, v, tp) + } + + /** + * Gets the maximum rank among all references to `v` in basic block `bb`. + */ + private int maxRefRank(ReachableBasicBlock bb, SsaSourceVariable v) { + result = max(refRank(bb, _, v, _)) + } + + /** + * Holds if variable `v` is live after the `i`th node of basic block `bb`, where + * `i` is the index of a node that may assign or capture `v`. + * + * For the purposes of this predicate, function calls are considered as writes of captured variables. + */ + private predicate liveAfterDef(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + exists(int r | r = refRank(bb, i, v, WriteRef()) | + // the next reference to `v` inside `bb` is a read + r + 1 = refRank(bb, _, v, ReadRef()) + or + // this is the last reference to `v` inside `bb`, but `v` is live at entry + // to a successor basic block of `bb` + r = maxRefRank(bb, v) and + liveAtSuccEntry(bb, v) + ) + } + + /** + * Holds if variable `v` is live at the beginning of basic block `bb`. + * + * For the purposes of this predicate, function calls are considered as writes of captured variables. + */ + private predicate liveAtEntry(ReachableBasicBlock bb, SsaSourceVariable v) { + // the first reference to `v` inside `bb` is a read + refRank(bb, _, v, ReadRef()) = 1 + or + // there is no reference to `v` inside `bb`, but `v` is live at entry + // to a successor basic block of `bb` + not exists(refRank(bb, _, v, _)) and + liveAtSuccEntry(bb, v) + } + + /** + * Holds if `v` is live at the beginning of any successor of basic block `bb`. + */ + private predicate liveAtSuccEntry(ReachableBasicBlock bb, SsaSourceVariable v) { + liveAtEntry(bb.getASuccessor(), v) + } + + /** + * Holds if `v` is assigned outside its declaring function. + */ + private predicate assignedThroughClosure(SsaSourceVariable v) { + any(IR::Instruction def | def.writes(v, _)).getRoot() != v.getDeclaringFunction() + } + + /** + * Holds if the `i`th node of `bb` is a use or an SSA definition of variable `v`, with + * `k` indicating whether it is the former or the latter. + */ + private predicate ssaRef(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind k) { + useAt(bb, i, v) and k = ReadRef() + or + any(SsaDefinition def).definesAt(bb, i, v) and k = WriteRef() + } + + /** + * Gets the (1-based) rank of the `i`th node of `bb` among all SSA definitions + * and uses of `v` in `bb`, with `k` indicating whether it is a definition or a use. + * + * For example, if `bb` is a basic block with a phi node for `v` (considered + * to be at index -1), uses `v` at node 2 and defines it at node 5, we have: + * + * ``` + * ssaRefRank(bb, -1, v, WriteRef()) = 1 // phi node + * ssaRefRank(bb, 2, v, ReadRef()) = 2 // use at node 2 + * ssaRefRank(bb, 5, v, WriteRef()) = 3 // definition at node 5 + * ``` + */ + private int ssaRefRank(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind k) { + i = rank[result](int j | ssaRef(bb, j, v, _)) and + ssaRef(bb, i, v, k) + } + + /** + * Gets the minimum rank of a read in `bb` such that all references to `v` between that + * read and the read at index `i` are reads (and not writes). + */ + private int rewindReads(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + exists(int r | r = ssaRefRank(bb, i, v, ReadRef()) | + exists(int j, RefKind k | r - 1 = ssaRefRank(bb, j, v, k) | + k = ReadRef() and result = rewindReads(bb, j, v) + or + k = WriteRef() and result = r + ) + or + r = 1 and result = r + ) + } + + /** + * Gets the SSA definition of `v` in `bb` that reaches the read of `v` at node `i`, if any. + */ + private SsaDefinition getLocalDefinition(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + exists(int r | r = rewindReads(bb, i, v) | + exists(int j | result.definesAt(bb, j, v) and ssaRefRank(bb, j, v, _) = r - 1) + ) + } + + /** + * Gets an SSA definition of `v` that reaches the end of the immediate dominator of `bb`. + */ + pragma[noinline] + private SsaDefinition getDefReachingEndOfImmediateDominator( + ReachableBasicBlock bb, SsaSourceVariable v + ) { + result = getDefReachingEndOf(bb.getImmediateDominator(), v) + } + + /** + * Gets an SSA definition of `v` that reaches the end of basic block `bb`. + */ + cached + SsaDefinition getDefReachingEndOf(ReachableBasicBlock bb, SsaSourceVariable v) { + exists(int lastRef | lastRef = max(int i | ssaRef(bb, i, v, _)) | + result = getLocalDefinition(bb, lastRef, v) + or + result.definesAt(bb, lastRef, v) and + liveAtSuccEntry(bb, v) + ) + or + // In SSA form, the (unique) reaching definition of a use is the closest + // definition that dominates the use. If two definitions dominate a node + // then one must dominate the other, so we can find the reaching definition + // by following the idominance relation backwards. + result = getDefReachingEndOfImmediateDominator(bb, v) and + not exists(SsaDefinition ssa | ssa.definesAt(bb, _, v)) and + liveAtSuccEntry(bb, v) + } + + /** + * Gets the unique SSA definition of `v` whose value reaches the `i`th node of `bb`, + * which is a use of `v`. + */ + cached + SsaDefinition getDefinition(ReachableBasicBlock bb, int i, SsaSourceVariable v) { + result = getLocalDefinition(bb, i, v) + or + rewindReads(bb, i, v) = 1 and result = getDefReachingEndOf(bb.getImmediateDominator(), v) + } +} + +import Internal diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/TaintTracking.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/TaintTracking.qll new file mode 100644 index 00000000000..096116d4bee --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/TaintTracking.qll @@ -0,0 +1,14 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ + +import semmle.go.dataflow.DataFlow + +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ +module TaintTracking { + import semmle.go.dataflow.internal.tainttracking1.TaintTrackingImpl +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/TaintTracking2.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/TaintTracking2.qll new file mode 100644 index 00000000000..6b1b2487e5b --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/TaintTracking2.qll @@ -0,0 +1,12 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ + +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ +module TaintTracking2 { + import semmle.go.dataflow.internal.tainttracking2.TaintTrackingImpl +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/RedirectCheckBarrierGuard.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/RedirectCheckBarrierGuard.qll new file mode 100644 index 00000000000..4876c890612 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/RedirectCheckBarrierGuard.qll @@ -0,0 +1,21 @@ +/** + * Provides an implementation of a commonly used barrier guard for sanitizing untrusted URLs. + */ + +import go + +/** + * A call to a function called `isLocalUrl`, `isValidRedirect`, or similar, which is + * considered a barrier guard for sanitizing untrusted URLs. + */ +class RedirectCheckBarrierGuard extends DataFlow::BarrierGuard, DataFlow::CallNode { + RedirectCheckBarrierGuard() { + this.getCalleeName().regexpMatch("(?i)(is_?)?(local_?url|valid_?redir(ect)?)(ur[li])?") + } + + override predicate checks(Expr e, boolean outcome) { + // `isLocalUrl(e)` is a barrier for `e` if it evaluates to `true` + getAnArgument().asExpr() = e and + outcome = true + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/RegexpCheck.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/RegexpCheck.qll new file mode 100644 index 00000000000..f78428c7abe --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/RegexpCheck.qll @@ -0,0 +1,25 @@ +/** + * Provides an implementation of a commonly used barrier guard for sanitizing untrusted URLs. + */ + +import go + +/** + * A call to a regexp match function, considered as a barrier guard for sanitizing untrusted URLs. + * + * This is overapproximate: we do not attempt to reason about the correctness of the regexp. + */ +class RegexpCheck extends DataFlow::BarrierGuard { + RegexpMatchFunction matchfn; + DataFlow::CallNode call; + + RegexpCheck() { + matchfn.getACall() = call and + this = matchfn.getResult().getNode(call).getASuccessor*() + } + + override predicate checks(Expr e, boolean branch) { + e = matchfn.getValue().getNode(call).asExpr() and + (branch = false or branch = true) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/UrlCheck.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/UrlCheck.qll new file mode 100644 index 00000000000..8aefc67ee38 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/barrierguardutil/UrlCheck.qll @@ -0,0 +1,32 @@ +/** + * Provides an implementation of a commonly used barrier guard for sanitizing untrusted URLs. + */ + +import go + +/** + * An equality check comparing a data-flow node against a constant string, considered as + * a barrier guard for sanitizing untrusted URLs. + * + * Additionally, a check comparing `url.Hostname()` against a constant string is also + * considered a barrier guard for `url`. + */ +class UrlCheck extends DataFlow::BarrierGuard, DataFlow::EqualityTestNode { + DataFlow::Node url; + + UrlCheck() { + exists(this.getAnOperand().getStringValue()) and + ( + url = this.getAnOperand() + or + exists(DataFlow::MethodCallNode mc | mc = this.getAnOperand() | + mc.getTarget().getName() = "Hostname" and + url = mc.getReceiver() + ) + ) + } + + override predicate checks(Expr e, boolean outcome) { + e = url.asExpr() and outcome = this.getPolarity() + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll new file mode 100644 index 00000000000..cbb3756edd6 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowDispatch.qll @@ -0,0 +1,109 @@ +private import go +private import DataFlowPrivate + +/** + * Holds if `call` is an interface call to method `m`, meaning that its receiver `recv` has + * interface type `tp`. + */ +private predicate isInterfaceCallReceiver( + DataFlow::CallNode call, DataFlow::Node recv, InterfaceType tp, string m +) { + call.getReceiver() = recv and + recv.getType().getUnderlyingType() = tp and + m = call.getCalleeName() +} + +/** Gets a data-flow node that may flow into the receiver value of `call`, which is an interface value. */ +private DataFlow::Node getInterfaceCallReceiverSource(DataFlow::CallNode call) { + isInterfaceCallReceiver(call, result.getASuccessor*(), _, _) +} + +/** Gets the type of `nd`, which must be a valid type and not an interface type. */ +private Type getConcreteType(DataFlow::Node nd) { + result = nd.getType() and + not result.getUnderlyingType() instanceof InterfaceType and + not result instanceof InvalidType +} + +/** + * Holds if all concrete (that is, non-interface) types of `nd` concrete types can be determined by + * local reasoning. + * + * `nd` is restricted to nodes that flow into the receiver value of an interface call, since that is + * all we are ultimately interested in. + */ +private predicate isConcreteValue(DataFlow::Node nd) { + nd = getInterfaceCallReceiverSource(_) and + ( + exists(getConcreteType(nd)) + or + forex(DataFlow::Node pred | pred = nd.getAPredecessor() | isConcreteValue(pred)) + ) +} + +/** + * Holds if `call` is an interface call to method `m` with receiver `recv`, where the concrete + * types of `recv` can be established by local reasoning. + */ +private predicate isConcreteInterfaceCall(DataFlow::Node call, DataFlow::Node recv, string m) { + isInterfaceCallReceiver(call, recv, _, m) and isConcreteValue(recv) +} + +/** + * Gets a function that might be called by `call`, where the receiver of `call` has interface type, + * but its concrete types can be determined by local reasoning. + */ +private FuncDecl getConcreteTarget(DataFlow::CallNode call) { + exists(DataFlow::Node recv, string m | isConcreteInterfaceCall(call, recv, m) | + exists(Type concreteReceiverType, DeclaredFunction concreteTarget | + concreteReceiverType = getConcreteType(getInterfaceCallReceiverSource(call)) and + concreteTarget = concreteReceiverType.getMethod(m) and + result = concreteTarget.getFuncDecl() + ) + ) +} + +/** + * Holds if `call` is a method call whose receiver has an interface type. + */ +private predicate isInterfaceMethodCall(DataFlow::CallNode call) { + isInterfaceCallReceiver(call, _, _, _) +} + +/** + * Gets a method that might be called by `call`, where we restrict the result to + * implement the interface type of the receiver of `call`. + */ +private MethodDecl getRestrictedInterfaceTarget(DataFlow::CallNode call) { + exists(InterfaceType tp, Type recvtp, string m | + isInterfaceCallReceiver(call, _, tp, m) and + result = recvtp.getMethod(m).(DeclaredFunction).getFuncDecl() and + recvtp.implements(tp) + ) +} + +/** + * Gets a function that might be called by `call`. + */ +DataFlowCallable viableCallable(CallExpr ma) { + exists(DataFlow::CallNode call | call.asExpr() = ma | + if isConcreteInterfaceCall(call, _, _) + then result = getConcreteTarget(call) + else + if isInterfaceMethodCall(call) + then result = getRestrictedInterfaceTarget(call) + else result = call.getACallee() + ) +} + +/** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. + */ +predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable f) { none() } + +/** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ +DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() } diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll new file mode 100644 index 00000000000..7693fc72848 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl.qll @@ -0,0 +1,4157 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStepExt(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + getNodeEnclosingCallable(node1) = getNodeEnclosingCallable(node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStepCached(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + getNodeEnclosingCallable(node1) != getNodeEnclosingCallable(node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private module Stage1 { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(Node node, Cc cc, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) and + cc = false + or + exists(Node mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(Node mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + fwdFlow(arg, _, config) and + viableParamArg(_, node, arg) and + cc = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + ) + } + + private predicate fwdFlow(Node node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowRead(Content c, Node node, Cc cc, Configuration config) { + exists(Node mid | + fwdFlow(mid, cc, config) and + read(mid, c, node) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(Node mid, Node node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _) and + c = tc.getContent() + ) + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(ReturnNodeExt ret | + fwdFlow(ret, cc, config) and + getReturnPosition(ret) = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, Node out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOut(call, pos, out) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, Node out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNode arg | + fwdFlow(arg, cc, config) and + viableParamArg(call, _, arg) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(Node node, boolean toReturn, Configuration config) { + fwdFlow(node, config) and + config.isSink(node) and + toReturn = false + or + exists(Node mid | + localFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(Node mid, Content c | + read(node, c, mid) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + getReturnPosition(node) = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(Node mid, Node node | + fwdFlow(node, pragma[only_bind_into](config)) and + read(node, c, mid) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, Node node, boolean toReturn, Configuration config) { + exists(Node mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + private predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOut(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, Node out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNode p, ArgNode arg, Configuration config + ) { + viableParamArg(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn(DataFlowCall call, ArgNode arg, boolean toReturn, Configuration config) { + exists(ParamNode p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNode arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(Node out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(Node n1, Content c, Node n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) and + read(n1, c, n2) + } + + pragma[nomagic] + predicate revFlow(Node node, Configuration config) { revFlow(node, _, config) } + + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow(node, toReturn, config) and exists(returnAp) and exists(ap) + } + + private predicate throughFlowNodeCand(Node node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(ReturnNodeExt ret | + throughFlowNodeCand(ret, config) and + callable = getNodeEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + getNodeEnclosingCallable(p) = c and + exists(ap) and + // we don't expect a parameter to return stored in itself + not exists(int pos | + kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) + ) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(Node n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(Node n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config +) { + viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNode p, ArgNode arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNode arg, ParamNode p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNode arg, ParamNode p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private module Stage2 { + module PrevStage = Stage1; + + class ApApprox = PrevStage::Ap; + + class Ap = boolean; + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + private ApApprox getApprox(Ap ap) { any() } + + private ApNil getApNil(Node node) { PrevStage::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + private Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + any() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) + ) and + exists(ap) and + exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + private predicate flowIntoCall = flowIntoCallNodeCand1/5; + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 2 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, getNodeEnclosingCallable(p), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = getNodeEnclosingCallable(ret) and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNode arg, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = getNodeEnclosingCallable(p) + } + + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = getNodeEnclosingCallable(ret) and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 2 logic. */ +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNode node1, ParamNode node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends Node { + FlowCheckNode() { + castNode(this) or + clearsContentCached(this, _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + predicate localFlowEntry(Node node, Configuration config) { + Stage2::revFlow(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParamNode or + node instanceof OutNodeExt or + store(_, _, node, _) or + read(_, _, node) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | Stage2::revFlow(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _) or + read(node, _, next) + ) + or + node instanceof FlowCheckNode + or + config.isSink(node) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and + Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCallCached(node2, cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = getNodeDataFlowType(node1) + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = getNodeDataFlowType(node2) + ) and + node1 != node2 and + cc.relevantFor(getNodeEnclosingCallable(node1)) and + not isUnreachableInCallCached(node1, cc.(LocalCallContextSpecificCall).getCall()) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = getNodeDataFlowType(node2) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, + LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +private module Stage3 { + module PrevStage = Stage2; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + private ApNil getApNil(Node node) { + PrevStage::revFlow(node, _) and result = TFrontNil(getNodeDataFlowType(node)) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + any() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + private predicate flowIntoCall = flowIntoCallNodeCand2/5; + + bindingset[node, ap] + private predicate filter(Node node, Ap ap) { + not ap.isClearedAt(node) and + if node instanceof CastingNode + then compatibleTypes(getNodeDataFlowType(node), ap.getType()) + else any() + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } + + /* Begin: Stage 3 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, getNodeEnclosingCallable(p), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = getNodeEnclosingCallable(ret) and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNode arg, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = getNodeEnclosingCallable(p) + } + + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = getNodeEnclosingCallable(ret) and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 3 logic. */ +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx(Node node, AccessPathFront argApf, Configuration config) { + exists(AccessPathFront apf | + Stage3::revFlow(node, true, _, apf, config) and + Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(Node n | + Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4 { + module PrevStage = Stage3; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + private ApApprox getApprox(Ap ap) { result = ap.getFront() } + + private ApNil getApNil(Node node) { + PrevStage::revFlow(node, _) and result = TNil(getNodeDataFlowType(node)) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = LocalCallContext; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + c = resolveCall(call, outercc) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + resolveReturn(innercc, inner, call) + or + innercc.(CallContextCall).matchesCall(call) + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { + localFlowEntry(node, config) and + result = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(node)) + } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + private predicate flowOutOfCall( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate flowIntoCall( + DataFlowCall call, ArgNode node1, ParamNode node2, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + bindingset[node, ap] + private predicate filter(Node node, Ap ap) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 4 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, getNodeEnclosingCallable(p), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = getNodeEnclosingCallable(ret) and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNode arg, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = getNodeEnclosingCallable(p) + } + + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = getNodeEnclosingCallable(ret) and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 4 logic. */ +} + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +private predicate nodeMayUseSummary(Node n, AccessPathApprox apa, Configuration config) { + exists(DataFlowCallable c, AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, apa, _) and + Stage4::revFlow(n, true, _, apa0, config) and + Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + getNodeEnclosingCallable(n) = c + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNode p, AccessPath ap) { + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNode p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(Node n | Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(getNodeDataFlowType(node)) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + pragma[only_bind_into](config).isSink(node) and + Stage4::revFlow(node, pragma[only_bind_into](config)) and + ( + // A sink that is also a source ... + config.isSource(node) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, TAccessPathNil(_)) and + pragma[only_bind_into](config) = mid.getConfiguration() + ) + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private predicate isHidden() { + hiddenNode(this.getNode()) and + not this.isSource() and + not this instanceof PathNodeSink + } + + private PathNode getASuccessorIfHidden() { + this.isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNode().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + Node node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNode() = sink.getNode() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbindConf(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + Node node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { config.isSource(node) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { + exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNode() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(midnode)) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(getNodeDataFlowType(node)) + or + exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNode(), tc.getContent(), node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNode(), _, tc, node, _, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private Node getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa +) { + exists(ArgNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + apa = ap.getApprox() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config +) { + exists(ParamNode p | + Stage4::revFlow(p, _, _, apa, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, i, outercc, call, ap, apa) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParamNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa, + Configuration config +) { + exists(PathNodeMid mid, ReturnNodeExt ret, int pos | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, + AccessPathApprox apa +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | + pathThroughCallable0(call, mid, kind, cc, ap, apa) and + out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration())) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) { + fwd = true and + nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config +) { + stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of a callable + viableReturnPosOut(_, getReturnPosition(node1), node2) + | + c1 = getNodeEnclosingCallable(node1) and + c2 = getNodeEnclosingCallable(node2) and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + config.isSink(n) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNode p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(getNodeDataFlowType(node)) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(getNodeEnclosingCallable(node), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap, + Configuration config + ) { + config.isSink(node) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, sc1, sc2, ap, config) and + not clearsContentCached(node, ap.getHead()) and + not fullBarrier(node, config) and + distSink(getNodeEnclosingCallable(node), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + not clearsContentCached(node, ap.getHead().getContent()) and + if node instanceof CastingNode + then compatibleTypes(getNodeDataFlowType(node), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNode().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(getNodeEnclosingCallable(this.getNode()), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(getNodeEnclosingCallable(this.getNode()), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + Node node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + + predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + Node node; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNode(), this.getSummaryCtx1(), this.getSummaryCtx2(), + this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + config.isSink(node) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node, cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeDataFlowType(node)) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeDataFlowType(node)) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) and + compatibleTypes(ap.getType(), getNodeDataFlowType(node)) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, Node node, PartialAccessPath ap2 + ) { + exists(Node midNode, DataFlowType contentType | + midNode = mid.getNode() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, + Configuration config + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node) and + ap.getHead() = tc and + config = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNode p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, ReturnNodeExt ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ParamNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = kind.getAnOutNode(call) + ) + } + + private predicate revPartialPathStep( + PartialPathNodeRev mid, Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNode(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNode(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNode(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNode(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNode p | + mid.getNode() = p and + viableParamArg(_, p, node) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and + pos = getReturnPosition(node) + ) + or + revPartialPathThroughCallable(mid, node, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, Node node, RevPartialAccessPath ap2 + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap1 = mid.getAp() and + read(node, c, midNode) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, Node node, Configuration config + ) { + exists(Node midNode, TypedContent tc | + midNode = mid.getNode() and + ap = mid.getAp() and + store(node, tc, midNode, _) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + DataFlowCall call, RevPartialAccessPath ap, Configuration config + ) { + exists(Node out | + mid.getNode() = out and + viableReturnPosOut(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNode p | + mid.getNode() = p and + p.isParameterOf(_, pos) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap, + Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 | + revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and + revPartialPathFlowsThrough(pos, sc1, sc2, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNode node, RevPartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, int pos | + revPartialPathThroughCallable0(call, mid, pos, ap, config) and + node.argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll new file mode 100644 index 00000000000..7693fc72848 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImpl2.qll @@ -0,0 +1,4157 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + abstract predicate isSource(Node source); + + /** + * Holds if `sink` is a relevant data flow sink. + */ + abstract predicate isSink(Node sink); + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** Holds if data flow through nodes guarded by `guard` is prohibited. */ + predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * Holds if the additional flow step from `node1` to `node2` must be taken + * into account in the analysis. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + super.hasFlow(source, sink) + } +} + +private predicate inBarrier(Node node, Configuration config) { + config.isBarrierIn(node) and + config.isSource(node) +} + +private predicate outBarrier(Node node, Configuration config) { + config.isBarrierOut(node) and + config.isSink(node) +} + +private predicate fullBarrier(Node node, Configuration config) { + config.isBarrier(node) + or + config.isBarrierIn(node) and + not config.isSource(node) + or + config.isBarrierOut(node) and + not config.isSink(node) + or + exists(BarrierGuard g | + config.isBarrierGuard(g) and + node = g.getAGuardedNode() + ) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(Node node1, Node node2, Configuration config) { + simpleLocalFlowStepExt(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + getNodeEnclosingCallable(node1) = getNodeEnclosingCallable(node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(Node node1, Node node2, Configuration config) { + jumpStepCached(node1, node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(Node node1, Node node2, Configuration config) { + config.isAdditionalFlowStep(node1, node2) and + getNodeEnclosingCallable(node1) != getNodeEnclosingCallable(node2) and + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private module Stage1 { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(Node node, Cc cc, Configuration config) { + not fullBarrier(node, config) and + ( + config.isSource(node) and + cc = false + or + exists(Node mid | + fwdFlow(mid, cc, config) and + localFlowStep(mid, node, config) + ) + or + exists(Node mid | + fwdFlow(mid, cc, config) and + additionalLocalFlowStep(mid, node, config) + ) + or + exists(Node mid | + fwdFlow(mid, _, config) and + jumpStep(mid, node, config) and + cc = false + ) + or + exists(Node mid | + fwdFlow(mid, _, config) and + additionalJumpStep(mid, node, config) and + cc = false + ) + or + // store + exists(Node mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _) and + not outBarrier(mid, config) + ) + or + // read + exists(Content c | + fwdFlowRead(c, node, cc, config) and + fwdFlowConsCand(c, config) and + not inBarrier(node, config) + ) + or + // flow into a callable + exists(Node arg | + fwdFlow(arg, _, config) and + viableParamArg(_, node, arg) and + cc = true + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + ) + } + + private predicate fwdFlow(Node node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowRead(Content c, Node node, Cc cc, Configuration config) { + exists(Node mid | + fwdFlow(mid, cc, config) and + read(mid, c, node) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(Node mid, Node node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _) and + c = tc.getContent() + ) + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(ReturnNodeExt ret | + fwdFlow(ret, cc, config) and + getReturnPosition(ret) = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, Node out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOut(call, pos, out) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, Node out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNode arg | + fwdFlow(arg, cc, config) and + viableParamArg(call, _, arg) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(Node node, boolean toReturn, Configuration config) { + fwdFlow(node, config) and + config.isSink(node) and + toReturn = false + or + exists(Node mid | + localFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(Node mid | + additionalLocalFlowStep(node, mid, config) and + revFlow(mid, toReturn, config) + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + exists(Node mid | + additionalJumpStep(node, mid, config) and + revFlow(mid, _, config) and + toReturn = false + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(Node mid, Content c | + read(node, c, mid) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + getReturnPosition(node) = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(Node mid, Node node | + fwdFlow(node, pragma[only_bind_into](config)) and + read(node, c, mid) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, Node node, boolean toReturn, Configuration config) { + exists(Node mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + private predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOut(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, Node out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNode p, ArgNode arg, Configuration config + ) { + viableParamArg(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn(DataFlowCall call, ArgNode arg, boolean toReturn, Configuration config) { + exists(ParamNode p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNode arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(Node out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(Node n1, Content c, Node n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) and + read(n1, c, n2) + } + + pragma[nomagic] + predicate revFlow(Node node, Configuration config) { revFlow(node, _, config) } + + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow(node, toReturn, config) and exists(returnAp) and exists(ap) + } + + private predicate throughFlowNodeCand(Node node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(ReturnNodeExt ret | + throughFlowNodeCand(ret, config) and + callable = getNodeEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + getNodeEnclosingCallable(p) = c and + exists(ap) and + // we don't expect a parameter to return stored in itself + not exists(int pos | + kind.(ParamUpdateReturnKind).getPosition() = pos and p.isParameterOf(_, pos) + ) + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(Node n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + tuples = count(Node n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(Node node1, Node node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, Node out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, Configuration config +) { + viableReturnPosOutNodeCand1(call, getReturnPosition(ret), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNode p, ArgNode arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNode arg, ParamNode p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(Node n1, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(Node n2, Configuration conf) { + result = + strictcount(Node n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, ReturnNodeExt ret, Node out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNode arg, ParamNode p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private module Stage2 { + module PrevStage = Stage1; + + class ApApprox = PrevStage::Ap; + + class Ap = boolean; + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + private ApApprox getApprox(Ap ap) { any() } + + private ApNil getApNil(Node node) { PrevStage::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + private Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + any() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) + ) and + exists(ap) and + exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + private predicate flowIntoCall = flowIntoCallNodeCand1/5; + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 2 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, getApprox(ap1), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, getNodeEnclosingCallable(p), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = getNodeEnclosingCallable(ret) and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, getApprox(ap), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNode arg, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = getNodeEnclosingCallable(p) + } + + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = getNodeEnclosingCallable(ret) and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 2 logic. */ +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNode node1, ParamNode node2, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlow(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends Node { + FlowCheckNode() { + castNode(this) or + clearsContentCached(this, _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + predicate localFlowEntry(Node node, Configuration config) { + Stage2::revFlow(node, config) and + ( + config.isSource(node) or + jumpStep(_, node, config) or + additionalJumpStep(_, node, config) or + node instanceof ParamNode or + node instanceof OutNodeExt or + store(_, _, node, _) or + read(_, _, node) or + node instanceof FlowCheckNode + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(Node node, Configuration config) { + exists(Node next | Stage2::revFlow(next, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + store(node, _, next, _) or + read(node, _, next) + ) + or + node instanceof FlowCheckNode + or + config.isSink(node) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2(Node node1, Node node2, Configuration config) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + Stage2::revFlow(node1, _, _, false, pragma[only_bind_into](config)) and + Stage2::revFlow(node2, _, _, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + Node node1, Node node2, boolean preservesValue, DataFlowType t, Configuration config, + LocalCallContext cc + ) { + not isUnreachableInCallCached(node2, cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = getNodeDataFlowType(node1) + or + additionalLocalFlowStepNodeCand2(node1, node2, config) and + preservesValue = false and + t = getNodeDataFlowType(node2) + ) and + node1 != node2 and + cc.relevantFor(getNodeEnclosingCallable(node1)) and + not isUnreachableInCallCached(node1, cc.(LocalCallContextSpecificCall).getCall()) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + or + exists(Node mid | + localFlowStepPlus(node1, mid, preservesValue, t, pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + or + exists(Node mid | + localFlowStepPlus(node1, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, node2, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = getNodeDataFlowType(node2) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + Node node1, Node node2, boolean preservesValue, AccessPathFrontNil apf, Configuration config, + LocalCallContext callContext + ) { + localFlowStepPlus(node1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, config) + } +} + +private import LocalFlowBigStep + +private module Stage3 { + module PrevStage = Stage2; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + private ApApprox getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + private ApNil getApNil(Node node) { + PrevStage::revFlow(node, _) and result = TFrontNil(getNodeDataFlowType(node)) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + class Cc = boolean; + + class CcCall extends Cc { + CcCall() { this = true } + + /** Holds if this call context may be `call`. */ + predicate matchesCall(DataFlowCall call) { any() } + } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + private class LocalCc = Unit; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { any() } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + any() + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { any() } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap, config, _) and exists(lcc) + } + + private predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + private predicate flowIntoCall = flowIntoCallNodeCand2/5; + + bindingset[node, ap] + private predicate filter(Node node, Ap ap) { + not ap.isClearedAt(node) and + if node instanceof CastingNode + then compatibleTypes(getNodeDataFlowType(node), ap.getType()) + else any() + } + + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } + + /* Begin: Stage 3 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, getNodeEnclosingCallable(p), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = getNodeEnclosingCallable(ret) and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNode arg, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = getNodeEnclosingCallable(p) + } + + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = getNodeEnclosingCallable(ret) and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 3 logic. */ +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx(Node node, AccessPathFront argApf, Configuration config) { + exists(AccessPathFront apf | + Stage3::revFlow(node, true, _, apf, config) and + Stage3::fwdFlow(node, true, TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(Node n | + Stage3::revFlow(n, _, _, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4 { + module PrevStage = Stage3; + + class ApApprox = PrevStage::Ap; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + private ApApprox getApprox(Ap ap) { result = ap.getFront() } + + private ApNil getApNil(Node node) { + PrevStage::revFlow(node, _) and result = TNil(getNodeDataFlowType(node)) + } + + bindingset[tc, tail] + private Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + private Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + class Cc = CallContext; + + class CcCall = CallContextCall; + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + private class LocalCc = LocalCallContext; + + bindingset[call, c, outercc] + private CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + c = resolveCall(call, outercc) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + + bindingset[call, c] + private CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call) { + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } + + bindingset[innercc, inner, call] + private predicate checkCallContextReturn(Cc innercc, DataFlowCallable inner, DataFlowCall call) { + resolveReturn(innercc, inner, call) + or + innercc.(CallContextCall).matchesCall(call) + } + + bindingset[node, cc, config] + private LocalCc getLocalCc(Node node, Cc cc, Configuration config) { + localFlowEntry(node, config) and + result = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(node)) + } + + private predicate localStep( + Node node1, Node node2, boolean preservesValue, ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, node2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + private predicate flowOutOfCall( + DataFlowCall call, ReturnNodeExt node1, Node node2, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate flowIntoCall( + DataFlowCall call, ArgNode node1, ParamNode node2, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, _, _, _, pragma[only_bind_into](config)) and + PrevStage::revFlow(node1, _, _, _, pragma[only_bind_into](config)) + } + + bindingset[node, ap] + private predicate filter(Node node, Ap ap) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + private predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } + + /* Begin: Stage 4 logic. */ + private predicate flowCand(Node node, ApApprox apa, Configuration config) { + PrevStage::revFlow(node, _, _, apa, config) + } + + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + exists(ApApprox apa0 | + apa = pragma[only_bind_into](apa0) and result = pragma[only_bind_into](apa0) + ) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + fwdFlow0(node, cc, argAp, ap, config) and + flowCand(node, unbindApa(getApprox(ap)), config) and + filter(node, ap) + } + + pragma[nomagic] + private predicate fwdFlow0(Node node, Cc cc, ApOption argAp, Ap ap, Configuration config) { + flowCand(node, _, config) and + config.isSource(node) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + or + exists(Node mid, Ap ap0, LocalCc localCc | + fwdFlow(mid, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc, config) + | + localStep(mid, node, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, node, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(Node mid | + fwdFlow(mid, _, _, ap, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(mid, _, _, nil, pragma[only_bind_into](config)) and + flowCand(node, _, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, any(CcNoCall innercc), cc, argAp, ap, config) + or + exists(Ap argAp0 | + fwdFlowOutFromArg(call, node, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + Node node1, Ap ap1, TypedContent tc, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, Node node1, Node node2, Cc cc, ApOption argAp, Configuration config + ) { + fwdFlow(node1, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNode p, Cc outercc, Cc innercc, ApOption argAp, Ap ap, + Configuration config + ) { + exists(ArgNode arg, boolean allowsFieldFlow | + fwdFlow(arg, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, getNodeEnclosingCallable(p), outercc) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + /** + * Holds if flow may exit from `call` at `out` with access path `ap`. The + * inner call context is `innercc`, but `ccOut` is just the call context + * based on the return step. In the case of through-flow `ccOut` is discarded + * and replaced by the outer call context as tracked by `fwdFlowIsEntered`. + */ + pragma[nomagic] + private predicate fwdFlowOut( + DataFlowCall call, Node out, Cc innercc, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, boolean allowsFieldFlow, DataFlowCallable inner | + fwdFlow(ret, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = getNodeEnclosingCallable(ret) and + checkCallContextReturn(innercc, inner, call) and + ccOut = getCallContextReturn(inner, call) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, Node out, Ap argAp, Ap ap, Configuration config + ) { + fwdFlowOut(call, out, any(CcCall ccc), _, apSome(argAp), ap, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNode p | + fwdFlowIn(call, p, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + Node node1, Ap ap1, TypedContent tc, Node node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, config) + } + + private predicate readStepFwd(Node n1, Ap ap1, Content c, Node n2, Ap ap2, Configuration config) { + fwdFlowRead(ap1, c, n1, n2, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow(Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config) { + revFlow0(node, toReturn, returnAp, ap, config) and + fwdFlow(node, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + Node node, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, _, _, ap, config) and + config.isSink(node) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + or + exists(Node mid | + localStep(node, mid, true, _, config, _) and + revFlow(mid, toReturn, returnAp, ap, config) + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + localStep(node, mid, false, _, config, _) and + revFlow(mid, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(Node mid | + jumpStep(node, mid, config) and + revFlow(mid, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(Node mid, ApNil nil | + fwdFlow(node, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(Node mid, Ap ap0 | + revFlow(mid, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, toReturn, returnAp, ap, config) and + toReturn = false + or + exists(Ap returnAp0 | + revFlowInToReturn(call, node, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + ) + or + // flow out of a callable + revFlowOut(_, node, _, _, ap, config) and + toReturn = true and + if fwdFlow(node, any(CcCall ccc), apSome(_), ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, Node node, TypedContent tc, Node mid, boolean toReturn, + ApOption returnAp, Configuration config + ) { + revFlow(mid, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(Node mid, Ap tail0 | + revFlow(mid, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, ReturnNodeExt ret, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(Node out, boolean allowsFieldFlow | + revFlow(out, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNode arg, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNode p, boolean allowsFieldFlow | + revFlow(p, toReturn, returnAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) + | + ap instanceof ApNil or allowsFieldFlow = true + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNode arg, Ap returnAp, Ap ap, Configuration config + ) { + revFlowIn(call, arg, true, apSome(returnAp), ap, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ReturnNodeExt ret, CcCall ccc | + revFlowOut(call, ret, toReturn, returnAp, ap, config) and + fwdFlow(ret, ccc, apSome(_), ap, config) and + ccc.matchesCall(call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + Node node1, Ap ap1, TypedContent tc, Node node2, DataFlowType contentType, Configuration config + ) { + exists(Ap ap2, Content c | + store(node1, tc, node2, contentType) and + revFlowStore(ap2, c, ap1, node1, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(Node node1, Content c, Node node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(Node node, Configuration config) { revFlow(node, _, _, _, config) } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNode p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, true, apSome(ap0), ap, config) and + c = getNodeEnclosingCallable(p) + } + + predicate parameterMayFlowThrough(ParamNode p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnNodeExt ret, Ap ap0, ReturnKindExt kind, int pos | + parameterFlow(p, ap, ap0, c, config) and + c = getNodeEnclosingCallable(ret) and + revFlow(ret, true, apSome(_), ap0, config) and + fwdFlow(ret, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.isParameterOf(_, pos) and + // we don't expect a parameter to return stored in itself + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) + } + + predicate stats(boolean fwd, int nodes, int fields, int conscand, int tuples, Configuration config) { + fwd = true and + nodes = count(Node node | fwdFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + tuples = count(Node n, Cc cc, ApOption argAp, Ap ap | fwdFlow(n, cc, argAp, ap, config)) + or + fwd = false and + nodes = count(Node node | revFlow(node, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + tuples = count(Node n, boolean b, ApOption retAp, Ap ap | revFlow(n, b, retAp, ap, config)) + } + /* End: Stage 4 logic. */ +} + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +private predicate nodeMayUseSummary(Node n, AccessPathApprox apa, Configuration config) { + exists(DataFlowCallable c, AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, apa, _) and + Stage4::revFlow(n, true, _, apa0, config) and + Stage4::fwdFlow(n, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + getNodeEnclosingCallable(n) = c + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNode p, AccessPath ap) { + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), _) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNode p; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, ap) } + + int getParameterPos() { p.isParameterOf(_, result) } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(Node n | Stage4::revFlow(n, _, _, apa, config) or nodeMayUseSummary(n, apa, config)) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid(Node node, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, config) and + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(getNodeDataFlowType(node)) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, _, _, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(Node node, Configuration config) { + pragma[only_bind_into](config).isSink(node) and + Stage4::revFlow(node, pragma[only_bind_into](config)) and + ( + // A sink that is also a source ... + config.isSource(node) + or + // ... or a sink that can be reached from a source + exists(PathNodeMid mid | + pathStep(mid, node, _, _, TAccessPathNil(_)) and + pragma[only_bind_into](config) = mid.getConfiguration() + ) + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + private predicate isHidden() { + hiddenNode(this.getNode()) and + not this.isSource() and + not this instanceof PathNodeSink + } + + private PathNode getASuccessorIfHidden() { + this.isHidden() and + result = this.(PathNodeImpl).getASuccessorImpl() + } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNode getASuccessorImpl(); + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNode().toString() + ppAp() } + + override string toStringWithContext() { result = this.getNode().toString() + ppAp() + ppCtx() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate reach(PathNode n) { n instanceof PathNodeSink or reach(n.getASuccessor()) } + +/** Holds if `n1.getSucc() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNode n1, PathNode n2) { n1.getASuccessor() = n2 and reach(n2) } + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { pathSucc(a, b) } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + Node node; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, cc, sc, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = getSuccMid() + or + // a final step to a sink via zero steps means we merge the last two steps to prevent trivial-looking edges + exists(PathNodeMid mid, PathNodeSink sink | + mid = getSuccMid() and + mid.getNode() = sink.getNode() and + mid.getAp() instanceof AccessPathNil and + sink.getConfiguration() = unbindConf(mid.getConfiguration()) and + result = sink + ) + } + + override predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap instanceof AccessPathNil + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + Node node; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, config) } + + override Node getNode() { result = node } + + override Configuration getConfiguration() { result = config } + + override PathNode getASuccessorImpl() { none() } + + override predicate isSource() { config.isSource(node) } +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +private predicate pathStep(PathNodeMid mid, Node node, CallContext cc, SummaryCtx sc, AccessPath ap) { + exists(AccessPath ap0, Node midnode, Configuration conf, LocalCallContext localCC | + midnode = mid.getNode() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = getLocalCallContext(pragma[only_bind_out](cc), getNodeEnclosingCallable(midnode)) and + ap0 = mid.getAp() + | + localFlowBigStep(midnode, node, true, _, conf, localCC) and + ap = ap0 + or + localFlowBigStep(midnode, node, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNode(), node, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(getNodeDataFlowType(node)) + or + exists(TypedContent tc | pathStoreStep(mid, node, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, _, cc, sc, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNode(), tc.getContent(), node, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, Node node, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNode(), _, tc, node, _, mid.getConfiguration()) and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, AccessPathApprox apa, + Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private Node getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, Node out, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, int i, CallContext cc, DataFlowCall call, AccessPath ap, AccessPathApprox apa +) { + exists(ArgNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + apa = ap.getApprox() + ) +} + +pragma[noinline] +private predicate parameterCand( + DataFlowCallable callable, int i, AccessPathApprox apa, Configuration config +) { + exists(ParamNode p | + Stage4::revFlow(p, _, _, apa, config) and + p.isParameterOf(callable, i) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, int i, CallContext outercc, DataFlowCall call, + AccessPath ap +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, i, outercc, call, ap, apa) and + callable = resolveCall(call, outercc) and + parameterCand(callable, any(int j | j <= i and j >= i), apa, mid.getConfiguration()) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +private predicate pathIntoCallable( + PathNodeMid mid, ParamNode p, CallContext outercc, CallContextCall innercc, SummaryCtx sc, + DataFlowCall call +) { + exists(int i, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, i, outercc, call, ap) and + p.isParameterOf(callable, i) and + ( + sc = TSummaryCtxSome(p, ap) + or + not exists(TSummaryCtxSome(p, ap)) and + sc = TSummaryCtxNone() + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, AccessPathApprox apa, + Configuration config +) { + exists(PathNodeMid mid, ReturnNodeExt ret, int pos | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + config = mid.getConfiguration() and + ap = mid.getAp() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + not kind.(ParamUpdateReturnKind).getPosition() = pos + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, CallContext cc, AccessPath ap, + AccessPathApprox apa +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, cc, innercc, sc, call) and + paramFlowsThrough(kind, innercc, sc, ap, apa, unbindConf(mid.getConfiguration())) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable(PathNodeMid mid, Node out, CallContext cc, AccessPath ap) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa | + pathThroughCallable0(call, mid, kind, cc, ap, apa) and + out = getAnOutNodeFlow(kind, call, apa, unbindConf(mid.getConfiguration())) + ) +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.getNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats(boolean fwd, int nodes, int fields, int conscand, int tuples) { + fwd = true and + nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(Node n0 | exists(PathNode pn | pn.getNode() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int tuples, Configuration config +) { + stage = "1 Fwd" and n = 10 and Stage1::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "1 Rev" and n = 15 and Stage1::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "2 Fwd" and n = 20 and Stage2::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "2 Rev" and n = 25 and Stage2::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "3 Fwd" and n = 30 and Stage3::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "3 Rev" and n = 35 and Stage3::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "4 Fwd" and n = 40 and Stage4::stats(true, nodes, fields, conscand, tuples, config) + or + stage = "4 Rev" and n = 45 and Stage4::stats(false, nodes, fields, conscand, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(Node node1, Node node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + // flow into callable + viableParamArg(_, node2, node1) + or + // flow out of a callable + viableReturnPosOut(_, getReturnPosition(node1), node2) + | + c1 = getNodeEnclosingCallable(node1) and + c2 = getNodeEnclosingCallable(node2) and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) and c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + config.isSource(n) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + config.isSink(n) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNode p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = TPartialNil(getNodeDataFlowType(node)) and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, cc, sc1, sc2, ap, config) and + distSrc(getNodeEnclosingCallable(node), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, RevPartialAccessPath ap, + Configuration config + ) { + config.isSink(node) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() and + not fullBarrier(node, config) and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, sc1, sc2, ap, config) and + not clearsContentCached(node, ap.getHead()) and + not fullBarrier(node, config) and + distSink(getNodeEnclosingCallable(node), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, PartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, cc, sc1, sc2, ap, config) and + not fullBarrier(node, config) and + not clearsContentCached(node, ap.getHead().getContent()) and + if node instanceof CastingNode + then compatibleTypes(getNodeDataFlowType(node), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNode().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { result = this.getNode().toString() + this.ppAp() + this.ppCtx() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + Node getNode() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(getNodeEnclosingCallable(this.getNode()), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(getNodeEnclosingCallable(this.getNode()), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + Node node; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, cc, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNode(), result.getCallContext(), result.getSummaryCtx1(), + result.getSummaryCtx2(), result.getAp(), result.getConfiguration()) + } + + predicate isSource() { + config.isSource(node) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + Node node; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, sc1, sc2, ap, config) } + + override Node getNode() { result = node } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNode(), this.getSummaryCtx1(), this.getSummaryCtx2(), + this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + config.isSink(node) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, Node node, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node, cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNode(), node, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeDataFlowType(node)) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNode(), node, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(getNodeDataFlowType(node)) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsFwd(ap, tc, ap0, config) and + compatibleTypes(ap.getType(), getNodeDataFlowType(node)) + ) + or + partialPathIntoCallable(mid, node, _, cc, sc1, sc2, _, ap, config) + or + partialPathOutOfCallable(mid, node, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() + or + partialPathThroughCallable(mid, node, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + bindingset[result, i] + private int unbindInt(int i) { i <= result and i >= result } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, Node node, PartialAccessPath ap2 + ) { + exists(Node midNode, DataFlowType contentType | + midNode = mid.getNode() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, Node node, CallContext cc, + Configuration config + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node) and + ap.getHead() = tc and + config = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, CallContext innercc, PartialAccessPath ap, + Configuration config + ) { + pos = getReturnPosition(mid.getNode()) and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, cc, ap, config) + | + out = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, int i, CallContext cc, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(ArgNode arg | + arg = mid.getNode() and + cc = mid.getCallContext() and + arg.argumentOf(call, i) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, int i, CallContext outercc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, i, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNode p, CallContext outercc, CallContextCall innercc, + TSummaryCtx1 sc1, TSummaryCtx2 sc2, DataFlowCall call, PartialAccessPath ap, + Configuration config + ) { + exists(int i, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, i, outercc, call, ap, config) and + p.isParameterOf(callable, i) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, ReturnNodeExt ret | + mid.getNode() = ret and + kind = ret.getKind() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ParamNode p, CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2 | + partialPathIntoCallable(mid, p, cc, innercc, sc1, sc2, call, _, config) and + paramFlowsThroughInPartialPath(kind, innercc, sc1, sc2, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, Node out, CallContext cc, PartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, cc, ap, config) and + out = kind.getAnOutNode(call) + ) + } + + private predicate revPartialPathStep( + PartialPathNodeRev mid, Node node, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNode(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNode(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNode(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNode(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNode p | + mid.getNode() = p and + viableParamArg(_, p, node) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, sc1, sc2, _, ap, config) and + pos = getReturnPosition(node) + ) + or + revPartialPathThroughCallable(mid, node, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, Node node, RevPartialAccessPath ap2 + ) { + exists(Node midNode | + midNode = mid.getNode() and + ap1 = mid.getAp() and + read(node, c, midNode) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, Node node, Configuration config + ) { + exists(Node midNode, TypedContent tc | + midNode = mid.getNode() and + ap = mid.getAp() and + store(node, tc, midNode, _) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + DataFlowCall call, RevPartialAccessPath ap, Configuration config + ) { + exists(Node out | + mid.getNode() = out and + viableReturnPosOut(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + int pos, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, RevPartialAccessPath ap, + Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNode p | + mid.getNode() = p and + p.isParameterOf(_, pos) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, int pos, RevPartialAccessPath ap, + Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2 | + revPartialPathIntoReturn(mid, _, sc1, sc2, call, _, config) and + revPartialPathFlowsThrough(pos, sc1, sc2, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNode node, RevPartialAccessPath ap, Configuration config + ) { + exists(DataFlowCall call, int pos | + revPartialPathThroughCallable0(call, mid, pos, ap, config) and + node.argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll new file mode 100644 index 00000000000..462e89ac9ed --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImplCommon.qll @@ -0,0 +1,1232 @@ +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +import Cached + +/** + * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion. + * + * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the + * estimated per-`AccessPathFront` tuple cost. Access paths exceeding both of + * these limits are represented with lower precision during pruning. + */ +predicate accessPathApproxCostLimits(int apLimit, int tupleLimit) { + apLimit = 10 and + tupleLimit = 10000 +} + +/** + * The cost limits for the `AccessPathApprox` to `AccessPath` expansion. + * + * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the + * estimated per-`AccessPathApprox` tuple cost. Access paths exceeding both of + * these limits are represented with lower precision. + */ +predicate accessPathCostLimits(int apLimit, int tupleLimit) { + apLimit = 5 and + tupleLimit = 1000 +} + +/** + * Provides a simple data-flow analysis for resolving lambda calls. The analysis + * currently excludes read-steps, store-steps, and flow-through. + * + * The analysis uses non-linear recursion: When computing a flow path in or out + * of a call, we use the results of the analysis recursively to resolve lambda + * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly. + */ +private module LambdaFlow { + private predicate viableParamNonLambda(DataFlowCall call, int i, ParamNode p) { + p.isParameterOf(viableCallable(call), i) + } + + private predicate viableParamLambda(DataFlowCall call, int i, ParamNode p) { + p.isParameterOf(viableCallableLambda(call, _), i) + } + + private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(int i | + viableParamNonLambda(call, i, p) and + arg.argumentOf(call, i) + ) + } + + private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(int i | + viableParamLambda(call, i, p) and + arg.argumentOf(call, i) + ) + } + + private newtype TReturnPositionSimple = + TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) { + exists(ReturnNode ret | + c = getNodeEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + pragma[noinline] + private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) { + result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind) + } + + pragma[nomagic] + private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) { + result = TReturnPositionSimple0(viableCallable(call), kind) + } + + pragma[nomagic] + private TReturnPositionSimple viableReturnPosLambda( + DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind + ) { + result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind) + } + + private predicate viableReturnPosOutNonLambda( + DataFlowCall call, TReturnPositionSimple pos, OutNode out + ) { + exists(ReturnKind kind | + pos = viableReturnPosNonLambda(call, kind) and + out = getAnOutNode(call, kind) + ) + } + + private predicate viableReturnPosOutLambda( + DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out + ) { + exists(ReturnKind kind | + pos = viableReturnPosLambda(call, lastCall, kind) and + out = getAnOutNode(call, kind) + ) + } + + /** + * Holds if data can flow (inter-procedurally) from `node` (of type `t`) to + * the lambda call `lambdaCall`. + * + * The parameter `toReturn` indicates whether the path from `node` to + * `lambdaCall` goes through a return, and `toJump` whether the path goes + * through a jump step. + * + * The call context `lastCall` records the last call on the path from `node` + * to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing + * callable of `lambdaCall`. + */ + pragma[nomagic] + predicate revLambdaFlow( + DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn, + boolean toJump, DataFlowCallOption lastCall + ) { + revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and + if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode + then compatibleTypes(t, getNodeDataFlowType(node)) + else any() + } + + pragma[nomagic] + predicate revLambdaFlow0( + DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn, + boolean toJump, DataFlowCallOption lastCall + ) { + lambdaCall(lambdaCall, kind, node) and + t = getNodeDataFlowType(node) and + toReturn = false and + toJump = false and + lastCall = TDataFlowCallNone() + or + // local flow + exists(Node mid, DataFlowType t0 | + revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall) + | + simpleLocalFlowStep(node, mid) and + t = t0 + or + exists(boolean preservesValue | + additionalLambdaFlowStep(node, mid, preservesValue) and + getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid) + | + preservesValue = false and + t = getNodeDataFlowType(node) + or + preservesValue = true and + t = t0 + ) + ) + or + // jump step + exists(Node mid, DataFlowType t0 | + revLambdaFlow(lambdaCall, kind, mid, t0, _, _, _) and + toReturn = false and + toJump = true and + lastCall = TDataFlowCallNone() + | + jumpStepCached(node, mid) and + t = t0 + or + exists(boolean preservesValue | + additionalLambdaFlowStep(node, mid, preservesValue) and + getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid) + | + preservesValue = false and + t = getNodeDataFlowType(node) + or + preservesValue = true and + t = t0 + ) + ) + or + // flow into a callable + exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call | + revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and + ( + if lastCall0 = TDataFlowCallNone() and toJump = false + then lastCall = TDataFlowCallSome(call) + else lastCall = lastCall0 + ) and + toReturn = false + | + viableParamArgNonLambda(call, p, node) + or + viableParamArgLambda(call, p, node) // non-linear recursion + ) + or + // flow out of a callable + exists(TReturnPositionSimple pos | + revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and + getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and + toReturn = true + ) + } + + pragma[nomagic] + predicate revLambdaFlowOutLambdaCall( + DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump, + DataFlowCall call, DataFlowCallOption lastCall + ) { + revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and + exists(ReturnKindExt rk | + out = rk.getAnOutNode(call) and + lambdaCall(call, _, _) + ) + } + + pragma[nomagic] + predicate revLambdaFlowOut( + DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t, + boolean toJump, DataFlowCallOption lastCall + ) { + exists(DataFlowCall call, OutNode out | + revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and + viableReturnPosOutNonLambda(call, pos, out) + or + // non-linear recursion + revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and + viableReturnPosOutLambda(call, _, pos, out) + ) + } + + pragma[nomagic] + predicate revLambdaFlowIn( + DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump, + DataFlowCallOption lastCall + ) { + revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall) + } +} + +private DataFlowCallable viableCallableExt(DataFlowCall call) { + result = viableCallable(call) + or + result = viableCallableLambda(call, _) +} + +cached +private module Cached { + /** + * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to + * force a stage-dependency on the `DataFlowImplCommon.qll` stage and therby + * collapsing the two stages. + */ + cached + predicate forceCachingInSameStage() { any() } + + cached + predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = n.getEnclosingCallable() } + + cached + predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) { + c = call.getEnclosingCallable() + } + + cached + predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) } + + cached + predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) } + + cached + predicate clearsContentCached(Node n, Content c) { clearsContent(n, c) } + + cached + predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) } + + cached + predicate outNodeExt(Node n) { + n instanceof OutNode + or + n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode + } + + cached + predicate hiddenNode(Node n) { nodeIsHidden(n) } + + cached + OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) { + result = getAnOutNode(call, k.(ValueReturnKind).getKind()) + or + exists(ArgNode arg | + result.(PostUpdateNode).getPreUpdateNode() = arg and + arg.argumentOf(call, k.(ParamUpdateReturnKind).getPosition()) + ) + } + + cached + predicate returnNodeExt(Node n, ReturnKindExt k) { + k = TValueReturn(n.(ReturnNode).getKind()) + or + exists(ParamNode p, int pos | + parameterValueFlowsToPreUpdate(p, n) and + p.isParameterOf(_, pos) and + k = TParamUpdate(pos) + ) + } + + cached + predicate castNode(Node n) { n instanceof CastNode } + + cached + predicate castingNode(Node n) { + castNode(n) or + n instanceof ParamNode or + n instanceof OutNodeExt or + // For reads, `x.f`, we want to check that the tracked type after the read (which + // is obtained by popping the head of the access path stack) is compatible with + // the type of `x.f`. + read(_, _, n) + } + + cached + predicate parameterNode(Node n, DataFlowCallable c, int i) { + n.(ParameterNode).isParameterOf(c, i) + } + + cached + predicate argumentNode(Node n, DataFlowCall call, int pos) { + n.(ArgumentNode).argumentOf(call, pos) + } + + /** + * Gets a viable target for the lambda call `call`. + * + * `lastCall` records the call required to reach `call` in order for the result + * to be a viable target, if any. + */ + cached + DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) { + exists(Node creation, LambdaCallKind kind | + LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and + lambdaCreation(creation, kind, result) + ) + } + + /** + * Holds if `p` is the `i`th parameter of a viable dispatch target of `call`. + * The instance parameter is considered to have index `-1`. + */ + pragma[nomagic] + private predicate viableParam(DataFlowCall call, int i, ParamNode p) { + p.isParameterOf(viableCallableExt(call), i) + } + + /** + * Holds if `arg` is a possible argument to `p` in `call`, taking virtual + * dispatch into account. + */ + cached + predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(int i | + viableParam(call, i, p) and + arg.argumentOf(call, i) and + compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p)) + ) + } + + pragma[nomagic] + private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) { + viableCallableExt(call) = result.getCallable() and + kind = result.getKind() + } + + /** + * Holds if a value at return position `pos` can be returned to `out` via `call`, + * taking virtual dispatch into account. + */ + cached + predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) { + exists(ReturnKindExt kind | + pos = viableReturnPos(call, kind) and + out = kind.getAnOutNode(call) + ) + } + + /** Provides predicates for calculating flow-through summaries. */ + private module FlowThrough { + /** + * The first flow-through approximation: + * + * - Input access paths are abstracted with a Boolean parameter + * that indicates (non-)emptiness. + */ + private module Cand { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps. + * + * `read` indicates whether it is contents of `p` that can flow to `node`. + */ + pragma[nomagic] + private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) { + p = node and + read = false + or + // local flow + exists(Node mid | + parameterValueFlowCand(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlowCand(p, mid, false) and + read(mid, _, node) and + read = true + ) + or + // flow through: no prior read + exists(ArgNode arg | + parameterValueFlowArgCand(p, arg, false) and + argumentValueFlowsThroughCand(arg, node, read) + ) + or + // flow through: no read inside method + exists(ArgNode arg | + parameterValueFlowArgCand(p, arg, read) and + argumentValueFlowsThroughCand(arg, node, false) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) { + parameterValueFlowCand(p, arg, read) + } + + pragma[nomagic] + predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) { + parameterValueFlowCand(p, n.getPreUpdateNode(), false) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps, not taking call contexts + * into account. + * + * `read` indicates whether it is contents of `p` that can flow to the return + * node. + */ + predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) { + exists(ReturnNode ret | + parameterValueFlowCand(p, ret, read) and + kind = ret.getKind() + ) + } + + pragma[nomagic] + private predicate argumentValueFlowsThroughCand0( + DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read + ) { + exists(ParamNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturnCand(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only value-preserving steps, + * not taking call contexts into account. + * + * `read` indicates whether it is contents of `arg` that can flow to `out`. + */ + predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThroughCand0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + ) + } + + predicate cand(ParamNode p, Node n) { + parameterValueFlowCand(p, n, _) and + ( + parameterValueFlowReturnCand(p, _, _) + or + parameterValueFlowsToPreUpdateCand(p, _) + ) + } + } + + /** + * The final flow-through calculation: + * + * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`) + * or summarized as a single read step with before and after types recorded + * in the `ReadStepTypesOption` parameter. + * - Types are checked using the `compatibleTypes()` relation. + */ + private module Final { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) { + parameterValueFlow0(p, node, read) and + if node instanceof CastingNode + then + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node)) + or + // getter + compatibleTypes(read.getContentType(), getNodeDataFlowType(node)) + else any() + } + + pragma[nomagic] + private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) { + p = node and + Cand::cand(p, _) and + read = TReadStepTypesNone() + or + // local flow + exists(Node mid | + parameterValueFlow(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlow(p, mid, TReadStepTypesNone()) and + readStepWithTypes(mid, read.getContainerType(), read.getContent(), node, + read.getContentType()) and + Cand::parameterValueFlowReturnCand(p, _, true) and + compatibleTypes(getNodeDataFlowType(p), read.getContainerType()) + ) + or + parameterValueFlow0_0(TReadStepTypesNone(), p, node, read) + } + + pragma[nomagic] + private predicate parameterValueFlow0_0( + ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read + ) { + // flow through: no prior read + exists(ArgNode arg | + parameterValueFlowArg(p, arg, mustBeNone) and + argumentValueFlowsThrough(arg, read, node) + ) + or + // flow through: no read inside method + exists(ArgNode arg | + parameterValueFlowArg(p, arg, read) and + argumentValueFlowsThrough(arg, mustBeNone, node) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) { + parameterValueFlow(p, arg, read) and + Cand::argumentValueFlowsThroughCand(arg, _, _) + } + + pragma[nomagic] + private predicate argumentValueFlowsThrough0( + DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ParamNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturn(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + pragma[nomagic] + predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThrough0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + | + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out)) + or + // getter + compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and + compatibleTypes(read.getContentType(), getNodeDataFlowType(out)) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and a single read step, not taking call + * contexts into account, thus representing a getter-step. + */ + predicate getterStep(ArgNode arg, Content c, Node out) { + argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps and possibly a single read + * step. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + private predicate parameterValueFlowReturn( + ParamNode p, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ReturnNode ret | + parameterValueFlow(p, ret, read) and + kind = ret.getKind() + ) + } + } + + import Final + } + + import FlowThrough + + cached + private module DispatchWithCallContext { + /** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. + */ + pragma[nomagic] + private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) { + mayBenefitFromCallContext(call, callable) + or + callEnclosingCallable(call, callable) and + exists(viableCallableLambda(call, TDataFlowCallSome(_))) + } + + /** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ + pragma[nomagic] + private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContext(call, ctx) + or + result = viableCallableLambda(call, TDataFlowCallSome(ctx)) + or + exists(DataFlowCallable enclosing | + mayBenefitFromCallContextExt(call, enclosing) and + enclosing = viableCallableExt(ctx) and + result = viableCallableLambda(call, TDataFlowCallNone()) + ) + } + + /** + * Holds if the call context `ctx` reduces the set of viable run-time + * dispatch targets of call `call` in `c`. + */ + cached + predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContextExt(call, c) and + c = viableCallableExt(ctx) and + ctxtgts = count(viableImplInCallContextExt(call, ctx)) and + tgts = strictcount(viableCallableExt(call)) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls for which a context + * makes a difference. + */ + cached + DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContextExt(call, ctx) and + reducedViableImplInCallContext(call, _, ctx) + } + + /** + * Holds if flow returning from callable `c` to call `call` might return + * further and if this path restricts the set of call sites that can be + * returned to. + */ + cached + predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContextExt(call, _) and + c = viableCallableExt(call) and + ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and + tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls and results for which + * the return flow from the result to `call` restricts the possible context + * `ctx`. + */ + cached + DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContextExt(call, ctx) and + reducedViableImplInReturn(result, call) + } + } + + import DispatchWithCallContext + + /** + * Holds if `p` can flow to the pre-update node associated with post-update + * node `n`, in the same callable, using only value-preserving steps. + */ + private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) { + parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone()) + } + + private predicate store( + Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + read(_, c, _) and + contentType = getNodeDataFlowType(node1) and + containerType = getNodeDataFlowType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + read(n2, c, n1) and + contentType = getNodeDataFlowType(n1) and + containerType = getNodeDataFlowType(n2) + ) + } + + cached + predicate read(Node node1, Content c, Node node2) { readStep(node1, c, node2) } + + /** + * Holds if data can flow from `node1` to `node2` via a direct assignment to + * `f`. + * + * This includes reverse steps through reads when the result of the read has + * been stored into, in order to handle cases like `x.f1.f2 = y`. + */ + cached + predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) { + store(node1, tc.getContent(), node2, contentType, tc.getContainerType()) + } + + /** + * Holds if data can flow from `fromNode` to `toNode` because they are the post-update + * nodes of some function output and input respectively, where the output and input + * are aliases. A typical example is a function returning `this`, implementing a fluent + * interface. + */ + private predicate reverseStepThroughInputOutputAlias( + PostUpdateNode fromNode, PostUpdateNode toNode + ) { + exists(Node fromPre, Node toPre | + fromPre = fromNode.getPreUpdateNode() and + toPre = toNode.getPreUpdateNode() + | + exists(DataFlowCall c | + // Does the language-specific simpleLocalFlowStep already model flow + // from function input to output? + fromPre = getAnOutNode(c, _) and + toPre.(ArgNode).argumentOf(c, _) and + simpleLocalFlowStep(toPre.(ArgNode), fromPre) + ) + or + argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre) + ) + } + + cached + predicate simpleLocalFlowStepExt(Node node1, Node node2) { + simpleLocalFlowStep(node1, node2) or + reverseStepThroughInputOutputAlias(node1, node2) + } + + /** + * Holds if the call context `call` either improves virtual dispatch in + * `callable` or if it allows us to prune unreachable nodes in `callable`. + */ + cached + predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) { + reducedViableImplInCallContext(_, callable, call) + or + exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call)) + } + + cached + newtype TCallContext = + TAnyCallContext() or + TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or + TSomeCall() or + TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) } + + cached + newtype TReturnPosition = + TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) { + exists(ReturnNodeExt ret | + c = returnNodeGetEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + cached + newtype TLocalFlowCallContext = + TAnyLocalCall() or + TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) } + + cached + newtype TReturnKindExt = + TValueReturn(ReturnKind kind) or + TParamUpdate(int pos) { exists(ParamNode p | p.isParameterOf(_, pos)) } + + cached + newtype TBooleanOption = + TBooleanNone() or + TBooleanSome(boolean b) { b = true or b = false } + + cached + newtype TDataFlowCallOption = + TDataFlowCallNone() or + TDataFlowCallSome(DataFlowCall call) + + cached + newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) } + + cached + newtype TAccessPathFront = + TFrontNil(DataFlowType t) or + TFrontHead(TypedContent tc) + + cached + newtype TAccessPathFrontOption = + TAccessPathFrontNone() or + TAccessPathFrontSome(AccessPathFront apf) +} + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +class CastingNode extends Node { + CastingNode() { castingNode(this) } +} + +private predicate readStepWithTypes( + Node n1, DataFlowType container, Content c, Node n2, DataFlowType content +) { + read(n1, c, n2) and + container = getNodeDataFlowType(n1) and + content = getNodeDataFlowType(n2) +} + +private newtype TReadStepTypesOption = + TReadStepTypesNone() or + TReadStepTypesSome(DataFlowType container, Content c, DataFlowType content) { + readStepWithTypes(_, container, c, _, content) + } + +private class ReadStepTypesOption extends TReadStepTypesOption { + predicate isSome() { this instanceof TReadStepTypesSome } + + DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) } + + Content getContent() { this = TReadStepTypesSome(_, result, _) } + + DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) } + + string toString() { if this.isSome() then result = "Some(..)" else result = "None()" } +} + +/** + * A call context to restrict the targets of virtual dispatch, prune local flow, + * and match the call sites of flow into a method with flow out of a method. + * + * There are four cases: + * - `TAnyCallContext()` : No restrictions on method flow. + * - `TSpecificCall(DataFlowCall call)` : Flow entered through the + * given `call`. This call improves the set of viable + * dispatch targets for at least one method call in the current callable + * or helps prune unreachable nodes in the current callable. + * - `TSomeCall()` : Flow entered through a parameter. The + * originating call does not improve the set of dispatch targets for any + * method call in the current callable and was therefore not recorded. + * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and + * this dispatch target of `call` implies a reduced set of dispatch origins + * to which data may flow if it should reach a `return` statement. + */ +abstract class CallContext extends TCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +abstract class CallContextNoCall extends CallContext { } + +class CallContextAny extends CallContextNoCall, TAnyCallContext { + override string toString() { result = "CcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +abstract class CallContextCall extends CallContext { + /** Holds if this call context may be `call`. */ + bindingset[call] + abstract predicate matchesCall(DataFlowCall call); +} + +class CallContextSpecificCall extends CallContextCall, TSpecificCall { + override string toString() { + exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + recordDataFlowCallSite(getCall(), callable) + } + + override predicate matchesCall(DataFlowCall call) { call = this.getCall() } + + DataFlowCall getCall() { this = TSpecificCall(result) } +} + +class CallContextSomeCall extends CallContextCall, TSomeCall { + override string toString() { result = "CcSomeCall" } + + override predicate relevantFor(DataFlowCallable callable) { + exists(ParamNode p | getNodeEnclosingCallable(p) = callable) + } + + override predicate matchesCall(DataFlowCall call) { any() } +} + +class CallContextReturn extends CallContextNoCall, TReturn { + override string toString() { + exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable)) + } +} + +/** + * A call context that is relevant for pruning local flow. + */ +abstract class LocalCallContext extends TLocalFlowCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +class LocalCallContextAny extends LocalCallContext, TAnyLocalCall { + override string toString() { result = "LocalCcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall { + LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) } + + DataFlowCall call; + + DataFlowCall getCall() { result = call } + + override string toString() { result = "LocalCcCall(" + call + ")" } + + override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) } +} + +private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) { + exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call)) +} + +/** + * Gets the local call context given the call context and the callable that + * the contexts apply to. + */ +LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) { + ctx.relevantFor(callable) and + if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable) + then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall() + else result instanceof LocalCallContextAny +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParamNode extends Node { + ParamNode() { parameterNode(this, _, _) } + + /** + * Holds if this node is the parameter of callable `c` at the specified + * (zero-based) position. + */ + predicate isParameterOf(DataFlowCallable c, int i) { parameterNode(this, c, i) } +} + +/** A data-flow node that represents a call argument. */ +class ArgNode extends Node { + ArgNode() { argumentNode(this, _, _) } + + /** Holds if this argument occurs at the given position in the given call. */ + final predicate argumentOf(DataFlowCall call, int pos) { argumentNode(this, call, pos) } +} + +/** + * A node from which flow can return to the caller. This is either a regular + * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + */ +class ReturnNodeExt extends Node { + ReturnNodeExt() { returnNodeExt(this, _) } + + /** Gets the kind of this returned value. */ + ReturnKindExt getKind() { returnNodeExt(this, result) } +} + +/** + * A node to which data can flow from a call. Either an ordinary out node + * or a post-update node associated with a call argument. + */ +class OutNodeExt extends Node { + OutNodeExt() { outNodeExt(this) } +} + +/** + * An extended return kind. A return kind describes how data can be returned + * from a callable. This can either be through a returned value or an updated + * parameter. + */ +abstract class ReturnKindExt extends TReturnKindExt { + /** Gets a textual representation of this return kind. */ + abstract string toString(); + + /** Gets a node corresponding to data flow out of `call`. */ + final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) } +} + +class ValueReturnKind extends ReturnKindExt, TValueReturn { + private ReturnKind kind; + + ValueReturnKind() { this = TValueReturn(kind) } + + ReturnKind getKind() { result = kind } + + override string toString() { result = kind.toString() } +} + +class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate { + private int pos; + + ParamUpdateReturnKind() { this = TParamUpdate(pos) } + + int getPosition() { result = pos } + + override string toString() { result = "param update " + pos } +} + +/** A callable tagged with a relevant return kind. */ +class ReturnPosition extends TReturnPosition0 { + private DataFlowCallable c; + private ReturnKindExt kind; + + ReturnPosition() { this = TReturnPosition0(c, kind) } + + /** Gets the callable. */ + DataFlowCallable getCallable() { result = c } + + /** Gets the return kind. */ + ReturnKindExt getKind() { result = kind } + + /** Gets a textual representation of this return position. */ + string toString() { result = "[" + kind + "] " + c } +} + +/** + * Gets the enclosing callable of `n`. Unlike `n.getEnclosingCallable()`, this + * predicate ensures that joins go from `n` to the result instead of the other + * way around. + */ +pragma[inline] +DataFlowCallable getNodeEnclosingCallable(Node n) { + nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result)) +} + +/** Gets the type of `n` used for type pruning. */ +pragma[inline] +DataFlowType getNodeDataFlowType(Node n) { + nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result)) +} + +pragma[noinline] +private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) { + result = getNodeEnclosingCallable(ret) +} + +pragma[noinline] +private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) { + result.getCallable() = returnNodeGetEnclosingCallable(ret) and + kind = result.getKind() +} + +pragma[noinline] +ReturnPosition getReturnPosition(ReturnNodeExt ret) { + result = getReturnPosition0(ret, ret.getKind()) +} + +bindingset[cc, callable] +predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) { + cc instanceof CallContextAny and callable = viableCallableExt(call) + or + exists(DataFlowCallable c0, DataFlowCall call0 | + callEnclosingCallable(call0, callable) and + cc = TReturn(c0, call0) and + c0 = prunedViableImplInCallContextReverse(call0, call) + ) +} + +bindingset[call, cc] +DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) { + exists(DataFlowCall ctx | cc = TSpecificCall(ctx) | + if reducedViableImplInCallContext(call, _, ctx) + then result = prunedViableImplInCallContext(call, ctx) + else result = viableCallableExt(call) + ) + or + result = viableCallableExt(call) and cc instanceof CallContextSomeCall + or + result = viableCallableExt(call) and cc instanceof CallContextAny + or + result = viableCallableExt(call) and cc instanceof CallContextReturn +} + +/** An optional Boolean value. */ +class BooleanOption extends TBooleanOption { + string toString() { + this = TBooleanNone() and result = "" + or + this = TBooleanSome(any(boolean b | result = b.toString())) + } +} + +/** An optional `DataFlowCall`. */ +class DataFlowCallOption extends TDataFlowCallOption { + string toString() { + this = TDataFlowCallNone() and + result = "(none)" + or + exists(DataFlowCall call | + this = TDataFlowCallSome(call) and + result = call.toString() + ) + } +} + +/** Content tagged with the type of a containing object. */ +class TypedContent extends MkTypedContent { + private Content c; + private DataFlowType t; + + TypedContent() { this = MkTypedContent(c, t) } + + /** Gets the content. */ + Content getContent() { result = c } + + /** Gets the container type. */ + DataFlowType getContainerType() { result = t } + + /** Gets a textual representation of this content. */ + string toString() { result = c.toString() } +} + +/** + * The front of an access path. This is either a head or a nil. + */ +abstract class AccessPathFront extends TAccessPathFront { + abstract string toString(); + + abstract DataFlowType getType(); + + abstract boolean toBoolNonEmpty(); + + TypedContent getHead() { this = TFrontHead(result) } + + predicate isClearedAt(Node n) { clearsContentCached(n, getHead().getContent()) } +} + +class AccessPathFrontNil extends AccessPathFront, TFrontNil { + private DataFlowType t; + + AccessPathFrontNil() { this = TFrontNil(t) } + + override string toString() { result = ppReprType(t) } + + override DataFlowType getType() { result = t } + + override boolean toBoolNonEmpty() { result = false } +} + +class AccessPathFrontHead extends AccessPathFront, TFrontHead { + private TypedContent tc; + + AccessPathFrontHead() { this = TFrontHead(tc) } + + override string toString() { result = tc.toString() } + + override DataFlowType getType() { result = tc.getContainerType() } + + override boolean toBoolNonEmpty() { result = true } +} + +/** An optional access path front. */ +class AccessPathFrontOption extends TAccessPathFrontOption { + string toString() { + this = TAccessPathFrontNone() and result = "" + or + this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString())) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImplSpecific.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImplSpecific.qll new file mode 100644 index 00000000000..ee044c5e426 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowImplSpecific.qll @@ -0,0 +1,11 @@ +/** + * Provides Go-specific definitions for use in the data flow library. + */ +module Private { + import DataFlowPrivate + import DataFlowDispatch +} + +module Public { + import DataFlowUtil +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll new file mode 100644 index 00000000000..c2093b2af14 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -0,0 +1,277 @@ +private import go +private import DataFlowUtil +private import DataFlowImplCommon + +private newtype TReturnKind = + MkReturnKind(int i) { exists(SignatureType st | exists(st.getResultType(i))) } + +/** + * A return kind. A return kind describes how a value can be returned + * from a callable. For Go, this is either a return of a single value + * or of one of multiple values. + */ +class ReturnKind extends TReturnKind { + /** Gets a textual representation of this return kind. */ + string toString() { exists(int i | this = MkReturnKind(i) | result = "return[" + i + "]") } +} + +/** A data flow node that represents returning a value from a function. */ +class ReturnNode extends ResultNode { + ReturnKind kind; + + ReturnNode() { kind = MkReturnKind(i) } + + /** Gets the kind of this returned value. */ + ReturnKind getKind() { result = kind } +} + +/** A data flow node that represents the output of a call. */ +class OutNode extends DataFlow::Node { + DataFlow::CallNode call; + int i; + + OutNode() { this = call.getResult(i) } + + /** Gets the underlying call. */ + DataFlowCall getCall() { result = call.asExpr() } +} + +/** + * Gets a node that can read the value returned from `call` with return kind + * `kind`. + */ +OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { + exists(DataFlow::CallNode c, int i | c.asExpr() = call and kind = MkReturnKind(i) | + result = c.getResult(i) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that loses the + * calling context. For example, this would happen with flow through a + * global or static variable. + */ +predicate jumpStep(Node n1, Node n2) { + exists(ValueEntity v, Write w | + not v instanceof SsaSourceVariable and + not v instanceof Field and + w.writes(v, n1) and + n2 = v.getARead() + ) +} + +private newtype TContent = + TFieldContent(Field f) or + TCollectionContent() or + TArrayContent() or + TPointerContent(PointerType p) + +/** + * A reference contained in an object. Examples include instance fields, the + * contents of a collection object, the contents of an array or pointer. + */ +class Content extends TContent { + /** Gets a textual representation of this element. */ + abstract string toString(); + + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0 + } +} + +private class FieldContent extends Content, TFieldContent { + Field f; + + FieldContent() { this = TFieldContent(f) } + + override string toString() { result = f.toString() } + + override predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + f.getDeclaration().hasLocationInfo(path, sl, sc, el, ec) + } +} + +private class CollectionContent extends Content, TCollectionContent { + override string toString() { result = "collection" } +} + +private class ArrayContent extends Content, TArrayContent { + override string toString() { result = "array" } +} + +private class PointerContent extends Content, TPointerContent { + override string toString() { result = "pointer" } +} + +/** + * Holds if data can flow from `node1` to `node2` via an assignment to `c`. + * Thus, `node2` references an object with a field `f` that contains the + * value of `node1`. + */ +predicate storeStep(Node node1, Content c, PostUpdateNode node2) { + // a write `(*p).f = rhs` is modelled as two store steps: `rhs` is flows into field `f` of `(*p)`, + // which in turn flows into the pointer content of `p` + exists(Write w, Field f, DataFlow::Node base, DataFlow::Node rhs | w.writesField(base, f, rhs) | + node1 = rhs and + node2.getPreUpdateNode() = base and + c = TFieldContent(f) + or + node1 = base and + node2.getPreUpdateNode() = node1.(PointerDereferenceNode).getOperand() and + c = TPointerContent(node2.getType()) + ) + or + node1 = node2.(AddressOperationNode).getOperand() and + c = TPointerContent(node2.getType()) +} + +/** + * Holds if data can flow from `node1` to `node2` via a read of `f`. + * Thus, `node1` references an object with a field `f` whose value ends up in + * `node2`. + */ +predicate readStep(Node node1, Content f, Node node2) { + node1 = node2.(PointerDereferenceNode).getOperand() and + f = TPointerContent(node1.getType()) + or + exists(FieldReadNode read | + node2 = read and + node1 = read.getBase() and + f = TFieldContent(read.getField()) + ) +} + +/** + * Holds if values stored inside content `c` are cleared at node `n`. + */ +predicate clearsContent(Node n, Content c) { + none() // stub implementation +} + +/** Gets the type of `n` used for type pruning. */ +DataFlowType getNodeType(Node n) { result = n.getType() } + +/** Gets a string representation of a type returned by `getNodeType()`. */ +string ppReprType(Type t) { result = t.toString() } + +/** + * Holds if `t1` and `t2` are compatible, that is, whether data can flow from + * a node of type `t1` to a node of type `t2`. + */ +pragma[inline] +predicate compatibleTypes(Type t1, Type t2) { + any() // stub implementation +} + +////////////////////////////////////////////////////////////////////////////// +// Java QL library compatibility wrappers +////////////////////////////////////////////////////////////////////////////// +/** A node that performs a type cast. */ +class CastNode extends ExprNode { + override ConversionExpr expr; +} + +class DataFlowCallable = FuncDef; + +class DataFlowExpr = Expr; + +class DataFlowType = Type; + +class DataFlowLocation = Location; + +/** A function call relevant for data flow. */ +class DataFlowCall extends Expr { + DataFlow::CallNode call; + + DataFlowCall() { this = call.asExpr() } + + /** + * Gets the nth argument for this call. + */ + Node getArgument(int n) { result = call.getArgument(n) } + + /** Gets the data flow node corresponding to this call. */ + ExprNode getNode() { result = call } + + /** Gets the enclosing callable of this call. */ + DataFlowCallable getEnclosingCallable() { result = this.getEnclosingFunction() } +} + +/** Holds if `e` is an expression that always has the same Boolean value `val`. */ +private predicate constantBooleanExpr(Expr e, boolean val) { + e.getBoolValue() = val + or + exists(SsaExplicitDefinition v, Expr src | + IR::evalExprInstruction(e) = v.getVariable().getAUse() and + IR::evalExprInstruction(src) = v.getRhs() and + constantBooleanExpr(src, val) + ) +} + +/** An argument that always has the same Boolean value. */ +private class ConstantBooleanArgumentNode extends ArgumentNode, ExprNode { + ConstantBooleanArgumentNode() { constantBooleanExpr(this.getExpr(), _) } + + /** Gets the Boolean value of this expression. */ + boolean getBooleanValue() { constantBooleanExpr(this.getExpr(), result) } +} + +/** + * Returns a guard that will certainly not hold in calling context `call`. + * + * In particular it does not hold because it checks that `param` has value `b`, but + * in context `call` it is known to have value `!b`. Note this is `noinline`d in order + * to avoid a bad join order in `isUnreachableInCall`. + */ +pragma[noinline] +private ControlFlow::ConditionGuardNode getAFalsifiedGuard(DataFlowCall call) { + exists(ParameterNode param, ConstantBooleanArgumentNode arg | + // get constant bool argument and parameter for this call + viableParamArg(call, param, arg) and + // which is used in a guard controlling `n` with the opposite value of `arg` + result.ensures(param.getAUse(), arg.getBooleanValue().booleanNot()) + ) +} + +/** + * Holds if the node `n` is unreachable when the call context is `call`. + */ +predicate isUnreachableInCall(Node n, DataFlowCall call) { + getAFalsifiedGuard(call).dominates(n.getBasicBlock()) +} + +int accessPathLimit() { result = 5 } + +/** The unit type. */ +private newtype TUnit = TMkUnit() + +/** The trivial type with a single element. */ +class Unit extends TUnit { + /** Gets a textual representation of this element. */ + string toString() { result = "unit" } +} + +/** + * Gets the `i`th argument of call `c`, where the receiver of a method call + * counts as argument -1. + */ +Node getArgument(CallNode c, int i) { + result = c.getArgument(i) + or + result = c.(MethodCallNode).getReceiver() and + i = -1 +} + +/** Holds if `n` should be hidden from path explanations. */ +predicate nodeIsHidden(Node n) { none() } + +class LambdaCallKind = Unit; + +/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */ +predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() } + +/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */ +predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() } + +/** Extra data-flow steps needed for lambda flow analysis. */ +predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() } diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll new file mode 100644 index 00000000000..0f764173167 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/DataFlowUtil.qll @@ -0,0 +1,1340 @@ +/** + * Provides Go-specific definitions for use in the data flow library. + */ + +import go +import semmle.go.dataflow.FunctionInputsAndOutputs +private import DataFlowPrivate + +cached +private newtype TNode = + MkInstructionNode(IR::Instruction insn) or + MkSsaNode(SsaDefinition ssa) or + MkGlobalFunctionNode(Function f) + +/** + * A node in a data flow graph. + * + * A node can be either an IR instruction or an SSA definition. + * Such nodes are created with `DataFlow::instructionNode` + * and `DataFlow::ssaNode` respectively. + */ +class Node extends TNode { + /** Gets the function to which this node belongs. */ + ControlFlow::Root getRoot() { none() } // overridden in subclasses + + /** INTERNAL: Use `getRoot()` instead. */ + FuncDef getEnclosingCallable() { result = getRoot() } + + /** Gets the type of this node. */ + Type getType() { none() } // overridden in subclasses + + /** Gets the expression corresponding to this node, if any. */ + Expr asExpr() { none() } // overridden in subclasses + + /** Gets the parameter corresponding to this node, if any. */ + Parameter asParameter() { none() } // overridden in subclasses + + /** Gets the IR instruction corresponding to this node, if any. */ + IR::Instruction asInstruction() { none() } // overridden in subclasses + + /** Gets a textual representation of the kind of this data-flow node. */ + string getNodeKind() { none() } // overridden in subclasses + + /** Gets the basic block to which this data-flow node belongs, if any. */ + BasicBlock getBasicBlock() { result = asInstruction().getBasicBlock() } + + /** Gets a textual representation of this element. */ + string toString() { result = "data-flow node" } // overridden in subclasses + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + filepath = "" and + startline = 0 and + startcolumn = 0 and + endline = 0 and + endcolumn = 0 + } + + /** Gets the file in which this node appears. */ + File getFile() { hasLocationInfo(result.getAbsolutePath(), _, _, _, _) } + + /** Gets the start line of the location of this node. */ + int getStartLine() { hasLocationInfo(_, result, _, _, _) } + + /** Gets the start column of the location of this node. */ + int getStartColumn() { hasLocationInfo(_, _, result, _, _) } + + /** Gets the end line of the location of this node. */ + int getEndLine() { hasLocationInfo(_, _, _, result, _) } + + /** Gets the end column of the location of this node. */ + int getEndColumn() { hasLocationInfo(_, _, _, _, result) } + + /** + * Gets an upper bound on the type of this node. + */ + Type getTypeBound() { result = getType() } + + /** Gets the floating-point value this data-flow node contains, if any. */ + float getFloatValue() { result = this.asExpr().getFloatValue() } + + /** + * Gets the integer value this data-flow node contains, if any. + * + * Note that this does not have a result if the value is too large to fit in a + * 32-bit signed integer type. + */ + int getIntValue() { result = this.asInstruction().getIntValue() } + + /** Gets either `getFloatValue` or `getIntValue`. */ + float getNumericValue() { result = this.asInstruction().getNumericValue() } + + /** + * Holds if the complex value this data-flow node contains has real part `real` and imaginary + * part `imag`. + */ + predicate hasComplexValue(float real, float imag) { + this.asInstruction().hasComplexValue(real, imag) + } + + /** Gets the string value this data-flow node contains, if any. */ + string getStringValue() { result = this.asInstruction().getStringValue() } + + /** + * Gets the string representation of the exact value this data-flow node + * contains, if any. + * + * For example, for the constant 3.141592653589793238462, this will + * result in 1570796326794896619231/500000000000000000000 + */ + string getExactValue() { result = this.asInstruction().getExactValue() } + + /** Gets the Boolean value this data-flow node contains, if any. */ + boolean getBoolValue() { result = this.asInstruction().getBoolValue() } + + /** Holds if the value of this data-flow node is known at compile time. */ + predicate isConst() { this.asInstruction().isConst() } + + /** + * Holds if the result of this instruction is known at compile time, and is guaranteed not to + * depend on the platform where it is evaluated. + */ + predicate isPlatformIndependentConstant() { this.asInstruction().isPlatformIndependentConstant() } + + /** + * Gets a data-flow node to which data may flow from this node in one (intra-procedural) step. + */ + Node getASuccessor() { localFlowStep(this, result) } + + /** + * Gets a data-flow node from which data may flow to this node in one (intra-procedural) step. + */ + Node getAPredecessor() { this = result.getASuccessor() } +} + +/** + * An IR instruction, viewed as a node in a data flow graph. + */ +class InstructionNode extends Node, MkInstructionNode { + IR::Instruction insn; + + InstructionNode() { this = MkInstructionNode(insn) } + + override IR::Instruction asInstruction() { result = insn } + + override ControlFlow::Root getRoot() { result = insn.getRoot() } + + override Type getType() { result = insn.getResultType() } + + override string getNodeKind() { result = insn.getInsnKind() } + + override string toString() { result = insn.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + insn.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * An expression, viewed as a node in a data flow graph. + */ +class ExprNode extends InstructionNode { + override IR::EvalInstruction insn; + Expr expr; + + ExprNode() { expr = insn.getExpr() } + + override Expr asExpr() { result = expr } + + /** Gets the underlying expression this node corresponds to. */ + Expr getExpr() { result = expr } +} + +/** + * An SSA variable, viewed as a node in a data flow graph. + */ +class SsaNode extends Node, MkSsaNode { + SsaDefinition ssa; + + SsaNode() { this = MkSsaNode(ssa) } + + /** Gets the node whose value is stored in this SSA variable, if any. */ + Node getInit() { result = instructionNode(ssa.(SsaExplicitDefinition).getRhs()) } + + /** Gets a use of this SSA variable. */ + InstructionNode getAUse() { result = instructionNode(ssa.getVariable().getAUse()) } + + /** Gets the program variable corresponding to this SSA variable. */ + SsaSourceVariable getSourceVariable() { result = ssa.getSourceVariable() } + + /** Gets the unique definition of this SSA variable. */ + SsaDefinition getDefinition() { result = ssa } + + override ControlFlow::Root getRoot() { result = ssa.getRoot() } + + override Type getType() { result = ssa.getSourceVariable().getType() } + + override string getNodeKind() { result = "SSA variable " + ssa.getSourceVariable().getName() } + + override string toString() { result = ssa.toString() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + ssa.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private module FunctionNode { + /** A function, viewed as a node in a data flow graph. */ + abstract class Range extends Node { + /** Gets the `i`th parameter of this function. */ + abstract ParameterNode getParameter(int i); + + /** Gets the name of this function, if it has one. */ + abstract string getName(); + + /** + * Gets the dataflow node holding the value of the receiver, if any. + */ + abstract ReceiverNode getReceiver(); + + /** + * Gets a value returned by the given function via a return statement or an assignment to a + * result variable. + */ + abstract ResultNode getAResult(); + + /** + * Gets the function entity this node corresponds to. + * + * Note that this predicate has no result for function literals. + */ + Function getFunction() { none() } + } +} + +/** A function, viewed as a node in a data flow graph. */ +class FunctionNode extends Node { + FunctionNode::Range self; + + FunctionNode() { this = self } + + /** Gets the `i`th parameter of this function. */ + ParameterNode getParameter(int i) { result = self.getParameter(i) } + + /** Gets a parameter of this function. */ + ParameterNode getAParameter() { result = this.getParameter(_) } + + /** Gets the number of parameters declared on this function. */ + int getNumParameter() { result = count(this.getAParameter()) } + + /** Gets the name of this function, if it has one. */ + string getName() { result = self.getName() } + + /** + * Gets the dataflow node holding the value of the receiver, if any. + */ + ReceiverNode getReceiver() { result = self.getReceiver() } + + /** + * Gets a value returned by the given function via a return statement or an assignment to a + * result variable. + */ + ResultNode getAResult() { result = self.getAResult() } + + /** + * Gets the data-flow node corresponding to the `i`th result of this function. + */ + ResultNode getResult(int i) { result = getAResult() and result.getIndex() = i } + + /** + * Gets the function entity this node corresponds to. + * + * Note that this predicate has no result for function literals. + */ + Function getFunction() { result = self.getFunction() } +} + +/** A representation of a function that is declared in the module scope. */ +class GlobalFunctionNode extends FunctionNode::Range, MkGlobalFunctionNode { + Function func; + + GlobalFunctionNode() { this = MkGlobalFunctionNode(func) } + + override ParameterNode getParameter(int i) { result = parameterNode(func.getParameter(i)) } + + override string getName() { result = func.getName() } + + override Function getFunction() { result = func } + + override ReceiverNode getReceiver() { result = receiverNode(func.(Method).getReceiver()) } + + override string getNodeKind() { result = "function " + func.getName() } + + override string toString() { result = "function " + func.getName() } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + func.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + override ResultNode getAResult() { + result.getRoot() = getFunction().(DeclaredFunction).getFuncDecl() + } +} + +/** A representation of the function that is defined by a function literal. */ +class FuncLitNode extends FunctionNode::Range, ExprNode { + override FuncLit expr; + + override ParameterNode getParameter(int i) { result = parameterNode(expr.getParameter(i)) } + + override string getName() { none() } + + override ReceiverNode getReceiver() { none() } + + override string toString() { result = "function literal" } + + override ResultNode getAResult() { result.getRoot() = getExpr() } +} + +/** + * Gets a possible target of call `cn`.class + * + * This is written explicitly like this instead of using `getCalleeNode().getAPredecessor*()` + * or `result.getASuccessor*() = cn.getCalleeNode()` because the explicit form inhibits the + * optimizer from combining this with other uses of `getASuccessor*()`, which can lead to + * recursion through a magic side-condition if those other users call `getACallee()` and thus + * pointless recomputation of `getACallee()` each recursive iteration. + */ +private DataFlow::Node getACalleeSource(DataFlow::CallNode cn) { + result = cn.getCalleeNode() or + result.getASuccessor() = getACalleeSource(cn) +} + +/** A data flow node that represents a call. */ +class CallNode extends ExprNode { + override CallExpr expr; + + /** Gets the declared target of this call */ + Function getTarget() { result = expr.getTarget() } + + private DataFlow::Node getACalleeSource() { result = getACalleeSource(this) } + + /** + * Gets the definition of a possible target of this call. + * + * For non-virtual calls, there is at most one possible call target (but there may be none if the + * target has no declaration). + * + * For virtual calls, we look up possible targets in all types that implement the receiver + * interface type. + */ + FuncDef getACallee() { + result = getTarget().(DeclaredFunction).getFuncDecl() + or + exists(DataFlow::Node calleeSource | calleeSource = getACalleeSource() | + result = calleeSource.asExpr() + or + exists(Method declared, Method actual | + calleeSource = declared.getARead() and + actual.implements(declared) and + result = actual.(DeclaredFunction).getFuncDecl() + ) + ) + } + + /** Gets the name of the function or method being called, if it can be determined. */ + string getCalleeName() { result = expr.getTarget().getName() or result = expr.getCalleeName() } + + /** Gets the data flow node specifying the function to be called. */ + Node getCalleeNode() { result = exprNode(expr.getCalleeExpr()) } + + /** Gets the underlying call. */ + CallExpr getCall() { result = this.getExpr() } + + /** + * Gets the data flow node corresponding to the `i`th argument of this call. + * + * Note that the first argument in calls to the built-in function `make` is a type, which is + * not a data-flow node. It is skipped for the purposes of this predicate, so the (syntactically) + * second argument becomes the first argument in terms of data flow. + * + * For calls of the form `f(g())` where `g` has multiple results, the arguments of the call to + * `i` are the (implicit) element extraction nodes for the call to `g`. + */ + Node getArgument(int i) { + if expr.getArgument(0).getType() instanceof TupleType + then result = extractTupleElement(exprNode(expr.getArgument(0)), i) + else + result = rank[i + 1](Expr arg, int j | arg = expr.getArgument(j) | exprNode(arg) order by j) + } + + /** Gets the data flow node corresponding to an argument of this call. */ + Node getAnArgument() { result = this.getArgument(_) } + + /** Gets the number of arguments of this call, if it can be determined. */ + int getNumArgument() { result = count(this.getAnArgument()) } + + /** Gets a function passed as the `i`th argument of this call. */ + FunctionNode getCallback(int i) { result.getASuccessor*() = this.getArgument(i) } + + /** + * Gets the data-flow node corresponding to the `i`th result of this call. + * + * If there is a single result then it is considered to be the 0th result. + */ + Node getResult(int i) { + i = 0 and result = getResult() + or + result = extractTupleElement(this, i) + } + + /** + * Gets the data-flow node corresponding to the result of this call. + * + * Note that this predicate is not defined for calls with multiple results; use the one-argument + * variant `getResult(i)` for such calls. + */ + Node getResult() { not getType() instanceof TupleType and result = this } + + /** Gets a result of this call. */ + Node getAResult() { result = this.getResult(_) } + + /** Gets the data flow node corresponding to the receiver of this call, if any. */ + Node getReceiver() { result = getACalleeSource().(MethodReadNode).getReceiver() } + + /** Holds if this call has an ellipsis after its last argument. */ + predicate hasEllipsis() { expr.hasEllipsis() } +} + +/** A data flow node that represents a call to a method. */ +class MethodCallNode extends CallNode { + MethodCallNode() { expr.getTarget() instanceof Method } + + override Method getTarget() { result = expr.getTarget() } + + override MethodDecl getACallee() { result = super.getACallee() } +} + +/** A representation of a parameter initialization. */ +class ParameterNode extends SsaNode { + override SsaExplicitDefinition ssa; + Parameter parm; + + ParameterNode() { ssa.getInstruction() = IR::initParamInstruction(parm) } + + /** Gets the parameter this node initializes. */ + override Parameter asParameter() { result = parm } + + /** Holds if this node initializes the `i`th parameter of `fd`. */ + predicate isParameterOf(FuncDef fd, int i) { parm.isParameterOf(fd, i) } +} + +/** A representation of a receiver initialization. */ +class ReceiverNode extends ParameterNode { + override ReceiverVariable parm; + + /** Gets the receiver variable this node initializes. */ + ReceiverVariable asReceiverVariable() { result = parm } + + /** Holds if this node initializes the receiver variable of `m`. */ + predicate isReceiverOf(MethodDecl m) { parm.isReceiverOf(m) } +} + +private Node getADirectlyWrittenNode() { + exists(Write w | w.writesField(result, _, _) or w.writesElement(result, _, _)) +} + +private DataFlow::Node getAccessPathPredecessor(DataFlow::Node node) { + result = node.(PointerDereferenceNode).getOperand() + or + result = node.(ComponentReadNode).getBase() +} + +private Node getAWrittenNode() { result = getAccessPathPredecessor*(getADirectlyWrittenNode()) } + +/** + * A node associated with an object after an operation that might have + * changed its state. + * + * This can be either the argument to a callable after the callable returns + * (which might have mutated the argument), or the qualifier of a field after + * an update to the field. + * + * Nodes corresponding to AST elements, for example `ExprNode`, usually refer + * to the value before the update with the exception of `ClassInstanceExpr`, + * which represents the value after the constructor has run. + */ +class PostUpdateNode extends Node { + Node preupd; + + PostUpdateNode() { + ( + preupd instanceof AddressOperationNode + or + preupd = any(AddressOperationNode addr).getOperand() + or + preupd = any(PointerDereferenceNode deref).getOperand() + or + preupd = getAWrittenNode() + or + preupd instanceof ArgumentNode and + mutableType(preupd.getType()) + ) and + ( + preupd = this.(SsaNode).getAUse() + or + preupd = this and + not basicLocalFlowStep(_, this) + ) + } + + /** + * Gets the node before the state update. + */ + Node getPreUpdateNode() { result = preupd } +} + +/** + * A data-flow node that occurs as an argument in a call, including receiver arguments. + */ +class ArgumentNode extends Node { + CallNode c; + int i; + + ArgumentNode() { this = getArgument(c, i) } + + /** + * Holds if this argument occurs at the given position in the given call. + * + * The receiver argument is considered to have index `-1`. + * + * Note that we currently do not track receiver arguments into calls to interface methods. + */ + predicate argumentOf(CallExpr call, int pos) { + call = c.asExpr() and + pos = i and + ( + i != -1 + or + exists(c.(MethodCallNode).getTarget().getBody()) + ) + } + + /** + * Gets the `CallNode` this is an argument to. + */ + CallNode getCall() { result = c } +} + +/** + * Holds if `tp` is a type that may (directly or indirectly) reference a memory location. + * + * If a value with a mutable type is passed to a function, the function could potentially + * mutate it or something it points to. + */ +predicate mutableType(Type tp) { + exists(Type underlying | underlying = tp.getUnderlyingType() | + not underlying instanceof BoolType and + not underlying instanceof NumericType and + not underlying instanceof StringType and + not underlying instanceof LiteralType + ) +} + +/** + * A node whose value is returned as a result from a function. + * + * This can either be a node corresponding to an expression in a return statement, + * or a node representing the current value of a named result variable at the exit + * of the function. + */ +class ResultNode extends InstructionNode { + FuncDef fd; + int i; + + ResultNode() { + exists(IR::ReturnInstruction ret | ret.getRoot() = fd | insn = ret.getResult(i)) + or + insn.(IR::ReadResultInstruction).reads(fd.getResultVar(i)) + } + + /** Gets the index of this result among all results of the function. */ + int getIndex() { result = i } +} + +/** + * A data-flow node that reads the value of a variable, constant, field or array element, + * or refers to a function. + */ +class ReadNode extends InstructionNode { + override IR::ReadInstruction insn; + + /** + * Holds if this data-flow node evaluates to value of `v`, which is a value entity, that is, a + * constant, variable, field, function, or method. + */ + predicate reads(ValueEntity v) { insn.reads(v) } + + /** + * Holds if this data-flow node reads the value of SSA variable `v`. + */ + predicate readsSsaVariable(SsaVariable v) { insn = v.getAUse() } + + /** + * Holds if this data-flow node reads the value of field `f` on the value of `base` or its + * implicit dereference. + * + * For example, for the field read `x.width`, `base` is either the data-flow node corresponding + * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference + * `*x`, and `f` is the field referenced by `width`. + */ + predicate readsField(Node base, Field f) { + insn.readsField(base.asInstruction(), f) + or + insn.readsField(IR::implicitDerefInstruction(base.asExpr()), f) + } + + /** + * Holds if this data-flow node reads the value of field `package.type.field` on the value of `base` or its + * implicit dereference. + * + * For example, for the field read `x.width`, `base` is either the data-flow node corresponding + * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference + * `*x`, and `x` has the type `package.type`. + */ + predicate readsField(Node base, string package, string type, string field) { + exists(Field f | f.hasQualifiedName(package, type, field) | this.readsField(base, f)) + } + + /** + * Holds if this data-flow node looks up method `m` on the value of `receiver` or its implicit + * dereference. + * + * For example, for the method read `x.area`, `receiver` is either the data-flow node corresponding + * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference + * `*x`, and `m` is the method referenced by `area`. + */ + predicate readsMethod(Node receiver, Method m) { + insn.readsMethod(receiver.asInstruction(), m) + or + insn.readsMethod(IR::implicitDerefInstruction(receiver.asExpr()), m) + } + + /** + * Holds if this data-flow node looks up method `package.type.name` on the value of `receiver` + * or its implicit dereference. + * + * For example, for the method read `x.name`, `receiver` is either the data-flow node corresponding + * to `x` or (if `x` is a pointer) the data-flow node corresponding to the implicit dereference + * `*x`, and `package.type` is a type of `x` that defines a method named `name`. + */ + predicate readsMethod(Node receiver, string package, string type, string name) { + exists(Method m | m.hasQualifiedName(package, type, name) | this.readsMethod(receiver, m)) + } + + /** + * Holds if this data-flow node reads the value of element `index` on the value of `base` or its + * implicit dereference. + * + * For example, for the element read `xs[i]`, `base` is either the data-flow node corresponding + * to `xs` or (if `xs` is a pointer) the data-flow node corresponding to the implicit dereference + * `*xs`, and `index` is the data-flow node corresponding to `i`. + */ + predicate readsElement(Node base, Node index) { + insn.readsElement(base.asInstruction(), index.asInstruction()) + or + insn.readsElement(IR::implicitDerefInstruction(base.asExpr()), index.asInstruction()) + } +} + +/** + * A data-flow node that reads the value of a field from a struct, or an element from an array, slice, map or string. + */ +class ComponentReadNode extends ReadNode { + override IR::ComponentReadInstruction insn; + + /** Gets the data-flow node representing the base from which the field or element is read. */ + Node getBase() { result = instructionNode(insn.getBase()) } +} + +/** + * A data-flow node that reads an element of an array, map, slice or string. + */ +class ElementReadNode extends ComponentReadNode { + override IR::ElementReadInstruction insn; + + /** Gets the data-flow node representing the index of the element being read. */ + Node getIndex() { result = instructionNode(insn.getIndex()) } + + /** Holds if this data-flow node reads element `index` of `base`. */ + predicate reads(Node base, Node index) { readsElement(base, index) } +} + +/** + * A data-flow node that extracts a substring or slice from a string, array, pointer to array, + * or slice. + */ +class SliceNode extends InstructionNode { + override IR::SliceInstruction insn; + + /** Gets the base of this slice node. */ + Node getBase() { result = instructionNode(insn.getBase()) } + + /** Gets the lower bound of this slice node. */ + Node getLow() { result = instructionNode(insn.getLow()) } + + /** Gets the upper bound of this slice node. */ + Node getHigh() { result = instructionNode(insn.getHigh()) } + + /** Gets the maximum of this slice node. */ + Node getMax() { result = instructionNode(insn.getMax()) } +} + +/** + * A data-flow node corresponding to an expression with a binary operator. + */ +class BinaryOperationNode extends Node { + Node left; + Node right; + string op; + + BinaryOperationNode() { + exists(BinaryExpr bin | bin = asExpr() | + left = exprNode(bin.getLeftOperand()) and + right = exprNode(bin.getRightOperand()) and + op = bin.getOperator() + ) + or + exists(IR::EvalCompoundAssignRhsInstruction rhs, CompoundAssignStmt assgn, string o | + rhs = asInstruction() and assgn = rhs.getAssignment() and o = assgn.getOperator() + | + left = exprNode(assgn.getLhs()) and + right = exprNode(assgn.getRhs()) and + op = o.substring(0, o.length() - 1) + ) + or + exists(IR::EvalIncDecRhsInstruction rhs, IncDecStmt ids | + rhs = asInstruction() and ids = rhs.getStmt() + | + left = exprNode(ids.getOperand()) and + right = instructionNode(any(IR::EvalImplicitOneInstruction one | one.getStmt() = ids)) and + op = ids.getOperator().charAt(0) + ) + } + + /** Holds if this operation may have observable side effects. */ + predicate mayHaveSideEffects() { asExpr().mayHaveOwnSideEffects() } + + /** Gets the left operand of this operation. */ + Node getLeftOperand() { result = left } + + /** Gets the right operand of this operation. */ + Node getRightOperand() { result = right } + + /** Gets an operand of this operation. */ + Node getAnOperand() { result = left or result = right } + + /** Gets the operator of this operation. */ + string getOperator() { result = op } + + /** Holds if `x` and `y` are the operands of this operation, in either order. */ + predicate hasOperands(Node x, Node y) { + x = getAnOperand() and + y = getAnOperand() and + x != y + } +} + +/** + * A data-flow node corresponding to an expression with a unary operator. + */ +class UnaryOperationNode extends InstructionNode { + UnaryOperationNode() { + asExpr() instanceof UnaryExpr + or + asExpr() instanceof StarExpr + or + insn instanceof IR::EvalImplicitDerefInstruction + } + + /** Holds if this operation may have observable side effects. */ + predicate mayHaveSideEffects() { + asExpr().mayHaveOwnSideEffects() + or + insn instanceof IR::EvalImplicitDerefInstruction + } + + /** Gets the operand of this operation. */ + Node getOperand() { + result = exprNode(asExpr().(UnaryExpr).getOperand()) + or + result = exprNode(asExpr().(StarExpr).getBase()) + or + result = exprNode(insn.(IR::EvalImplicitDerefInstruction).getOperand()) + } + + /** Gets the operator of this operation. */ + string getOperator() { + result = asExpr().(UnaryExpr).getOperator() + or + asExpr() instanceof StarExpr and + result = "*" + or + insn instanceof IR::EvalImplicitDerefInstruction and + result = "*" + } +} + +/** + * A data-flow node that dereferences a pointer. + */ +class PointerDereferenceNode extends UnaryOperationNode { + PointerDereferenceNode() { + asExpr() instanceof StarExpr + or + asExpr() instanceof DerefExpr + or + insn instanceof IR::EvalImplicitDerefInstruction + } +} + +/** + * A data-flow node that takes the address of a memory location. + */ +class AddressOperationNode extends UnaryOperationNode, ExprNode { + override AddressExpr expr; +} + +/** + * A data-flow node that reads the value of a field. + */ +class FieldReadNode extends ComponentReadNode { + override IR::FieldReadInstruction insn; + + /** Gets the field this node reads. */ + Field getField() { result = insn.getField() } + + /** Gets the name of the field this node reads. */ + string getFieldName() { result = this.getField().getName() } +} + +/** + * A data-flow node that refers to a method. + */ +class MethodReadNode extends ReadNode { + override IR::MethodReadInstruction insn; + + /** Gets the receiver node on which the method is referenced. */ + Node getReceiver() { result = instructionNode(insn.getReceiver()) } + + /** Gets the method this node refers to. */ + Method getMethod() { result = insn.getMethod() } + + /** Gets the name of the method this node refers to. */ + string getMethodName() { result = this.getMethod().getName() } +} + +/** + * A data-flow node performing a relational comparison using `<`, `<=`, `>` or `>=`. + */ +class RelationalComparisonNode extends BinaryOperationNode, ExprNode { + override RelationalComparisonExpr expr; + + /** Holds if this comparison evaluates to `outcome` iff `lesser <= greater + bias`. */ + predicate leq(boolean outcome, Node lesser, Node greater, int bias) { + outcome = true and + lesser = exprNode(expr.getLesserOperand()) and + greater = exprNode(expr.getGreaterOperand()) and + (if expr.isStrict() then bias = -1 else bias = 0) + or + outcome = false and + lesser = exprNode(expr.getGreaterOperand()) and + greater = exprNode(expr.getLesserOperand()) and + (if expr.isStrict() then bias = 0 else bias = -1) + } +} + +/** + * A data-flow node performing an equality test using `==` or `!=`. + */ +class EqualityTestNode extends BinaryOperationNode, ExprNode { + override EqualityTestExpr expr; + + /** Holds if this comparison evaluates to `outcome` iff `lhs == rhs`. */ + predicate eq(boolean outcome, Node lhs, Node rhs) { + outcome = expr.getPolarity() and + expr.hasOperands(lhs.asExpr(), rhs.asExpr()) + } + + /** Gets the polarity of this equality test, that is, `true` for `==` and `false` for `!=`. */ + boolean getPolarity() { result = expr.getPolarity() } +} + +/** + * A data-flow node performing a type cast using either a type conversion + * or an assertion. + */ +class TypeCastNode extends ExprNode { + TypeCastNode() { + expr instanceof TypeAssertExpr + or + expr instanceof ConversionExpr + } + + /** + * Gets the type being converted to. Note this differs from `this.getType()` for + * `TypeAssertExpr`s that return a (result, ok) tuple. + */ + Type getResultType() { + if this.getType() instanceof TupleType + then result = this.getType().(TupleType).getComponentType(0) + else result = this.getType() + } + + /** Gets the operand of the type cast. */ + DataFlow::Node getOperand() { + result.asExpr() = expr.(TypeAssertExpr).getExpr() + or + result.asExpr() = expr.(ConversionExpr).getOperand() + } +} + +/** + * A data-flow node representing an element of an array, map, slice or string defined from `range` statement. + * + * Example: in `_, x := range y { ... }`, this represents the `Node` that extracts the element from the + * range statement, which will flow to `x`. + */ +class RangeElementNode extends Node { + DataFlow::Node base; + IR::ExtractTupleElementInstruction extract; + + RangeElementNode() { + this.asInstruction() = extract and + extract.extractsElement(_, 1) and + extract.getBase().(IR::GetNextEntryInstruction).getDomain() = base.asInstruction() + } + + /** Gets the data-flow node representing the base from which the element is read. */ + DataFlow::Node getBase() { result = base } +} + +/** + * Holds if `node` reads an element from `base`, either via an element-read (`base[y]`) expression + * or via a range statement `_, node := range base`. + */ +predicate readsAnElement(DataFlow::Node node, DataFlow::Node base) { + node.(ElementReadNode).readsElement(base, _) or + node.(RangeElementNode).getBase() = base +} + +/** + * A model of a function specifying that the function copies input values from + * a parameter or qualifier to a result. + * + * Note that this only models verbatim copying. Flow that does not preserve exact + * values should be modeled by `TaintTracking::FunctionModel` instead. + */ +abstract class FunctionModel extends Function { + /** Holds if data flows through this function from `input` to `output`. */ + abstract predicate hasDataFlow(FunctionInput input, FunctionOutput output); + + /** Gets an input node for this model for the call `c`. */ + DataFlow::Node getAnInputNode(DataFlow::CallNode c) { this.flowStepForCall(result, _, c) } + + /** Gets an output node for this model for the call `c`. */ + DataFlow::Node getAnOutputNode(DataFlow::CallNode c) { this.flowStepForCall(_, result, c) } + + /** Holds if this function model causes data to flow from `pred` to `succ` for the call `c`. */ + predicate flowStepForCall(DataFlow::Node pred, DataFlow::Node succ, DataFlow::CallNode c) { + c = this.getACall() and + exists(FunctionInput inp, FunctionOutput outp | this.hasDataFlow(inp, outp) | + pred = inp.getNode(c) and + succ = outp.getNode(c) + ) + } + + /** Holds if this function model causes data to flow from `pred` to `succ`. */ + predicate flowStep(DataFlow::Node pred, DataFlow::Node succ) { + this.flowStepForCall(pred, succ, _) + } +} + +/** + * Gets the `Node` corresponding to `insn`. + */ +InstructionNode instructionNode(IR::Instruction insn) { result = MkInstructionNode(insn) } + +/** + * Gets the `Node` corresponding to `e`. + */ +ExprNode exprNode(Expr e) { result.asExpr() = e.stripParens() } + +/** + * Gets the `Node` corresponding to the value of `p` at function entry. + */ +ParameterNode parameterNode(Parameter p) { result.asParameter() = p } + +/** + * Gets the `Node` corresponding to the value of `r` at function entry. + */ +ReceiverNode receiverNode(ReceiverVariable r) { result.asReceiverVariable() = r } + +/** + * Gets the data-flow node corresponding to SSA variable `v`. + */ +SsaNode ssaNode(SsaVariable v) { result.getDefinition() = v.getDefinition() } + +/** + * Gets the data-flow node corresponding to the `i`th element of tuple `t` (which is either a call + * with multiple results, an iterator in a range loop, or the result of a type assertion). + */ +Node extractTupleElement(Node t, int i) { + exists(IR::Instruction insn | t = instructionNode(insn) | + result = instructionNode(IR::extractTupleElement(insn, i)) + ) +} + +/** + * Holds if `node` refers to a value returned alongside a non-nil error value. + * + * For example, `0` in `func tryGetInt() (int, error) { return 0, errors.New("no good") }` + */ +predicate isReturnedWithError(Node node) { + exists(ReturnStmt ret, int nodeArg, int errorArg | + ret.getExpr(nodeArg) = node.asExpr() and + nodeArg != errorArg and + ret.getExpr(errorArg).getType() instanceof ErrorType + // That last condition implies ret.getExpr(errorArg) is non-nil, since nil doesn't implement `error` + ) +} + +/** + * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) } + +/** + * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step, not taking function models into account. + */ +private predicate basicLocalFlowStep(Node nodeFrom, Node nodeTo) { + // Instruction -> Instruction + exists(Expr pred, Expr succ | + succ.(LogicalBinaryExpr).getAnOperand() = pred or + succ.(ConversionExpr).getOperand() = pred + | + nodeFrom = exprNode(pred) and + nodeTo = exprNode(succ) + ) + or + // Type assertion: if in the context `checked, ok := e.(*Type)` (in which + // case tuple-extraction instructions exist), flow from `e` to `e.(*Type)[0]`; + // otherwise flow from `e` to `e.(*Type)`. + exists(IR::Instruction evalAssert, TypeAssertExpr assert | + nodeFrom.asExpr() = assert.getExpr() and + evalAssert = IR::evalExprInstruction(assert) and + if exists(IR::extractTupleElement(evalAssert, _)) + then nodeTo.asInstruction() = IR::extractTupleElement(evalAssert, 0) + else nodeTo.asInstruction() = evalAssert + ) + or + // Instruction -> SSA + exists(IR::Instruction pred, SsaExplicitDefinition succ | + succ.getRhs() = pred and + nodeFrom = MkInstructionNode(pred) and + nodeTo = MkSsaNode(succ) + ) + or + // SSA -> SSA + exists(SsaDefinition pred, SsaDefinition succ | + succ.(SsaVariableCapture).getSourceVariable() = pred.(SsaExplicitDefinition).getSourceVariable() or + succ.(SsaPseudoDefinition).getAnInput() = pred + | + nodeFrom = MkSsaNode(pred) and + nodeTo = MkSsaNode(succ) + ) + or + // SSA -> Instruction + exists(SsaDefinition pred, IR::Instruction succ | + succ = pred.getVariable().getAUse() and + nodeFrom = MkSsaNode(pred) and + nodeTo = MkInstructionNode(succ) + ) + or + // GlobalFunctionNode -> use + nodeFrom = MkGlobalFunctionNode(nodeTo.asExpr().(FunctionName).getTarget()) +} + +/** + * INTERNAL: do not use. + * + * This is the local flow predicate that's used as a building block in global + * data flow. It may have less flow than the `localFlowStep` predicate. + */ +cached +predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { + basicLocalFlowStep(nodeFrom, nodeTo) + or + // step through function model + any(FunctionModel m).flowStep(nodeFrom, nodeTo) +} + +/** + * Holds if data flows from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } + +/** + * A guard that validates some expression. + * + * To use this in a configuration, extend the class and provide a + * characteristic predicate precisely specifying the guard, and override + * `checks` to specify what is being validated and in which branch. + * + * When using a data-flow or taint-flow configuration `cfg`, it is important + * that any classes extending BarrierGuard in scope which are not used in `cfg` + * are disjoint from any classes extending BarrierGuard in scope which are used + * in `cfg`. + */ +abstract class BarrierGuard extends Node { + /** Holds if this guard validates `e` upon evaluating to `branch`. */ + abstract predicate checks(Expr e, boolean branch); + + /** Gets a node guarded by this guard. */ + final Node getAGuardedNode() { + exists(ControlFlow::ConditionGuardNode guard, Node nd, SsaWithFields var | + result = var.getAUse() + | + this.guards(guard, nd, var) and + guard.dominates(result.getBasicBlock()) + ) + } + + /** + * Holds if `guard` markes a point in the control-flow graph where this node + * is known to validate `nd`, which is represented by `ap`. + * + * This predicate exists to enforce a good join order in `getAGuardedNode`. + */ + pragma[noinline] + private predicate guards(ControlFlow::ConditionGuardNode guard, Node nd, SsaWithFields ap) { + this.guards(guard, nd) and nd = ap.getAUse() + } + + /** + * Holds if `guard` markes a point in the control-flow graph where this node + * is known to validate `nd`. + */ + private predicate guards(ControlFlow::ConditionGuardNode guard, Node nd) { + exists(boolean branch | + this.checks(nd.asExpr(), branch) and + guard.ensures(this, branch) + ) + or + exists( + Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p, CallNode c, + Node resNode, Node check, boolean outcome + | + guardingFunction(f, inp, outp, p) and + c = f.getACall() and + nd = inp.getNode(c) and + localFlow(pragma[only_bind_into](outp.getNode(c)), resNode) and + p.checkOn(check, outcome, resNode) and + guard.ensures(pragma[only_bind_into](check), outcome) + ) + } + + /** + * Holds if whenever `p` holds of output `outp` of function `f`, this node + * is known to validate the input `inp` of `f`. + * + * We check this by looking for guards on `inp` that dominate a `return` statement that + * is the only `return` in `f` that can return `true`. This means that if `f` returns `true`, + * the guard must have been satisfied. (Similar reasoning is applied for statements returning + * `false`, `nil` or a non-`nil` value.) + */ + private predicate guardingFunction( + Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p + ) { + exists(FuncDecl fd, Node arg, Node ret | + fd.getFunction() = f and + localFlow(inp.getExitNode(fd), arg) and + ret = outp.getEntryNode(fd) and + ( + // Case: a function like "if someBarrierGuard(arg) { return true } else { return false }" + exists(ControlFlow::ConditionGuardNode guard | + this.guards(guard, arg) and + guard.dominates(ret.getBasicBlock()) + | + exists(boolean b | + onlyPossibleReturnOfBool(fd, outp, ret, b) and + p.isBoolean(b) + ) + or + onlyPossibleReturnOfNonNil(fd, outp, ret) and + p.isNonNil() + or + onlyPossibleReturnOfNil(fd, outp, ret) and + p.isNil() + ) + or + // Case: a function like "return someBarrierGuard(arg)" + // or "return !someBarrierGuard(arg) && otherCond(...)" + exists(boolean outcome | + ret = getUniqueOutputNode(fd, outp) and + this.checks(arg.asExpr(), outcome) and + // This predicate's contract is (p holds of ret ==> arg is checked), + // (and we have (this has outcome ==> arg is checked)) + // but p.checkOn(ret, outcome, this) gives us (ret has outcome ==> p holds of this), + // so we need to swap outcome and (specifically boolean) p: + DataFlow::booleanProperty(outcome).checkOn(ret, p.asBoolean(), this) + ) + or + // Case: a function like "return guardProxy(arg)" + // or "return !guardProxy(arg) || otherCond(...)" + exists( + Function f2, FunctionInput inp2, FunctionOutput outp2, CallNode c, + DataFlow::Property outpProp + | + ret = getUniqueOutputNode(fd, outp) and + this.guardingFunction(f2, inp2, outp2, outpProp) and + c = f2.getACall() and + arg = inp2.getNode(c) and + ( + // See comment above ("This method's contract...") for rationale re: the inversion of + // `p` and `outpProp` here: + outpProp.checkOn(ret, p.asBoolean(), outp2.getNode(c)) + or + // The particular case where p is non-boolean (i.e., nil or non-nil), and we directly return `c`: + outpProp = p and ret = outp2.getNode(c) + ) + ) + ) + ) + } +} + +DataFlow::Node getUniqueOutputNode(FuncDecl fd, FunctionOutput outp) { + result = unique(DataFlow::Node n | n = outp.getEntryNode(fd) | n) +} + +/** + * Holds if `ret` is a data-flow node whose value contributes to the output `res` of `fd`, + * and that node may have Boolean value `b`. + */ +predicate possiblyReturnsBool(FuncDecl fd, FunctionOutput res, Node ret, Boolean b) { + ret = res.getEntryNode(fd) and + ret.getType().getUnderlyingType() instanceof BoolType and + not ret.getBoolValue() != b +} + +/** + * Holds if `ret` is the only data-flow node whose value contributes to the output `res` of `fd` + * that may have Boolean value `b`, since all the other output nodes have a Boolean value + * other than `b`. + */ +private predicate onlyPossibleReturnOfBool(FuncDecl fd, FunctionOutput res, Node ret, boolean b) { + possiblyReturnsBool(fd, res, ret, b) and + forall(Node otherRet | otherRet = res.getEntryNode(fd) and otherRet != ret | + otherRet.getBoolValue() != b + ) +} + +/** + * Holds if `ret` is a data-flow node whose value contributes to the output `res` of `fd`, + * and that node may evaluate to a value other than `nil`. + */ +predicate possiblyReturnsNonNil(FuncDecl fd, FunctionOutput res, Node ret) { + ret = res.getEntryNode(fd) and + not ret.asExpr() = Builtin::nil().getAReference() +} + +/** + * Holds if `ret` is the only data-flow node whose value contributes to the output `res` of `fd` + * that may have a value other than `nil`, since all the other output nodes evaluate to `nil`. + */ +private predicate onlyPossibleReturnOfNonNil(FuncDecl fd, FunctionOutput res, Node ret) { + possiblyReturnsNonNil(fd, res, ret) and + forall(Node otherRet | otherRet = res.getEntryNode(fd) and otherRet != ret | + otherRet.asExpr() = Builtin::nil().getAReference() + ) +} + +/** + * Holds if function `f`'s result `output`, which must be a return value, cannot be nil. + */ +private predicate certainlyReturnsNonNil(Function f, FunctionOutput output) { + output.isResult(_) and + ( + f.hasQualifiedName("errors", "New") + or + f.hasQualifiedName("fmt", "Errorf") + or + f in [Builtin::new(), Builtin::make()] + or + exists(FuncDecl fd | fd = f.getFuncDecl() | + forex(DataFlow::Node ret | ret = output.getEntryNode(fd) | isCertainlyNotNil(ret)) + ) + ) +} + +/** + * Holds if `node` cannot be `nil`. + */ +private predicate isCertainlyNotNil(DataFlow::Node node) { + node instanceof DataFlow::AddressOperationNode + or + exists(DataFlow::CallNode c, FunctionOutput output | output.getExitNode(c) = node | + certainlyReturnsNonNil(c.getTarget(), output) + ) +} + +/** + * Holds if `ret` is the only data-flow node whose value contributes to the output `res` of `fd` + * that returns `nil`, since all the other output nodes are known to be non-nil. + */ +private predicate onlyPossibleReturnOfNil(FuncDecl fd, FunctionOutput res, DataFlow::Node ret) { + ret = res.getEntryNode(fd) and + ret.asExpr() = Builtin::nil().getAReference() and + forall(DataFlow::Node otherRet | otherRet = res.getEntryNode(fd) and otherRet != ret | + isCertainlyNotNil(otherRet) + ) +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll new file mode 100644 index 00000000000..d5394a578f9 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/TaintTrackingUtil.qll @@ -0,0 +1,384 @@ +/** + * Provides Go-specific definitions for use in the taint-tracking library. + */ + +private import go + +/** + * Holds if taint can flow from `src` to `sink` in zero or more + * local (intra-procedural) steps. + */ +predicate localTaint(DataFlow::Node src, DataFlow::Node sink) { localTaintStep*(src, sink) } + +/** + * Holds if taint can flow from `src` to `sink` in zero or more + * local (intra-procedural) steps. + */ +predicate localExprTaint(Expr src, Expr sink) { + localTaint(DataFlow::exprNode(src), DataFlow::exprNode(sink)) +} + +/** + * Holds if taint can flow in one local step from `src` to `sink`. + */ +predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) { + DataFlow::localFlowStep(src, sink) or + localAdditionalTaintStep(src, sink) +} + +private newtype TUnit = TMkUnit() + +/** A singleton class containing a single dummy "unit" value. */ +private class Unit extends TUnit { + /** Gets a textual representation of this element. */ + string toString() { result = "unit" } +} + +/** + * A unit class for adding additional taint steps. + * + * Extend this class to add additional taint steps that should apply to all + * taint configurations. + */ +class AdditionalTaintStep extends Unit { + /** + * Holds if the step from `node1` to `node2` should be considered a taint + * step for all configurations. + */ + abstract predicate step(DataFlow::Node node1, DataFlow::Node node2); +} + +/** + * Holds if the additional step from `pred` to `succ` should be included in all + * global taint flow configurations. + */ +predicate localAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + referenceStep(pred, succ) or + elementWriteStep(pred, succ) or + fieldReadStep(pred, succ) or + elementStep(pred, succ) or + tupleStep(pred, succ) or + stringConcatStep(pred, succ) or + sliceStep(pred, succ) or + any(FunctionModel fm).taintStep(pred, succ) or + any(AdditionalTaintStep a).step(pred, succ) +} + +/** + * Holds if taint flows from `pred` to `succ` via a reference or dereference. + * + * The taint-tracking library does not distinguish between a reference and its referent, + * treating one as tainted if the other is. + */ +predicate referenceStep(DataFlow::Node pred, DataFlow::Node succ) { + exists(DataFlow::AddressOperationNode addr | + // from `x` to `&x` + pred = addr.getOperand() and + succ = addr + or + // from `&x` to `x` + pred = addr and + succ.(DataFlow::PostUpdateNode).getPreUpdateNode() = addr.getOperand() + ) + or + exists(DataFlow::PointerDereferenceNode deref | + // from `x` to `*x` + pred = deref.getOperand() and + succ = deref + or + // from `*x` to `x` + pred = deref and + succ.(DataFlow::PostUpdateNode).getPreUpdateNode() = deref.getOperand() + ) +} + +/** + * Holds if there is an assignment of the form `succ[idx] = pred`, meaning that `pred` may taint + * `succ`. + */ +predicate elementWriteStep(DataFlow::Node pred, DataFlow::Node succ) { + any(DataFlow::Write w).writesElement(succ.(DataFlow::PostUpdateNode).getPreUpdateNode(), _, pred) +} + +/** Holds if taint flows from `pred` to `succ` via a field read. */ +predicate fieldReadStep(DataFlow::Node pred, DataFlow::Node succ) { + succ.(DataFlow::FieldReadNode).getBase() = pred +} + +/** + * Holds if taint flows from `pred` to `succ` via an array, map, slice, or string + * index operation. + */ +predicate elementStep(DataFlow::Node pred, DataFlow::Node succ) { + succ.(DataFlow::ElementReadNode).getBase() = pred + or + exists(IR::GetNextEntryInstruction nextEntry | + pred.asInstruction() = nextEntry.getDomain() and + // only step into the value, not the index + succ.asInstruction() = IR::extractTupleElement(nextEntry, 1) + ) +} + +deprecated predicate arrayStep = elementStep/2; + +/** Holds if taint flows from `pred` to `succ` via an extract tuple operation. */ +predicate tupleStep(DataFlow::Node pred, DataFlow::Node succ) { + succ = DataFlow::extractTupleElement(pred, _) +} + +/** Holds if taint flows from `pred` to `succ` via string concatenation. */ +predicate stringConcatStep(DataFlow::Node pred, DataFlow::Node succ) { + exists(DataFlow::BinaryOperationNode conc | + conc.getOperator() = "+" and conc.getType() instanceof StringType + | + succ = conc and conc.getAnOperand() = pred + ) +} + +/** Holds if taint flows from `pred` to `succ` via a slice operation. */ +predicate sliceStep(DataFlow::Node pred, DataFlow::Node succ) { + succ.(DataFlow::SliceNode).getBase() = pred +} + +/** + * A model of a function specifying that the function propagates taint from + * a parameter or qualifier to a result. + */ +abstract class FunctionModel extends Function { + /** Holds if taint propagates through this function from `input` to `output`. */ + abstract predicate hasTaintFlow(FunctionInput input, FunctionOutput output); + + /** Gets an input node for this model for the call `c`. */ + DataFlow::Node getAnInputNode(DataFlow::CallNode c) { this.taintStepForCall(result, _, c) } + + /** Gets an output node for this model for the call `c`. */ + DataFlow::Node getAnOutputNode(DataFlow::CallNode c) { this.taintStepForCall(_, result, c) } + + /** Holds if this function model causes taint to flow from `pred` to `succ` for the call `c`. */ + predicate taintStepForCall(DataFlow::Node pred, DataFlow::Node succ, DataFlow::CallNode c) { + c = this.getACall() and + exists(FunctionInput inp, FunctionOutput outp | this.hasTaintFlow(inp, outp) | + pred = inp.getNode(c) and + succ = outp.getNode(c) + ) + } + + /** Holds if this function model causes taint to flow from `pred` to `succ`. */ + predicate taintStep(DataFlow::Node pred, DataFlow::Node succ) { + this.taintStepForCall(pred, succ, _) + } +} + +/** + * Holds if the additional step from `src` to `sink` should be included in all + * global taint flow configurations. + */ +predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) { + localAdditionalTaintStep(src, sink) +} + +/** + * A sanitizer in all global taint flow configurations but not in local taint. + */ +abstract class DefaultTaintSanitizer extends DataFlow::Node { } + +/** + * Holds if `node` should be a sanitizer in all global taint flow configurations + * but not in local taint. + */ +predicate defaultTaintSanitizer(DataFlow::Node node) { node instanceof DefaultTaintSanitizer } + +/** + * A sanitizer guard in all global taint flow configurations but not in local taint. + */ +abstract class DefaultTaintSanitizerGuard extends DataFlow::BarrierGuard { } + +/** + * Holds if `guard` should be a sanitizer guard in all global taint flow configurations + * but not in local taint. + */ +predicate defaultTaintSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof DefaultTaintSanitizerGuard +} + +/** + * An equality test acting as a sanitizer guard for `nonConstNode` by + * restricting it to a known value. + * + * Note that comparisons to `nil` are excluded. This is needed for performance + * reasons. + */ +class EqualityTestGuard extends DefaultTaintSanitizerGuard, DataFlow::EqualityTestNode { + DataFlow::Node nonConstNode; + + EqualityTestGuard() { + this.getAnOperand().isConst() and + nonConstNode = this.getAnOperand() and + not nonConstNode.isConst() and + not this.getAnOperand() = Builtin::nil().getARead() + } + + override predicate checks(Expr e, boolean outcome) { + e = nonConstNode.asExpr() and + outcome = this.getPolarity() + } +} + +/** + * Holds if data flows from `node` to `switchExprNode`, which is the expression + * of a switch statement. + */ +private predicate flowsToSwitchExpression(DataFlow::Node node, DataFlow::Node switchExprNode) { + switchExprNode.asExpr() = any(ExpressionSwitchStmt ess).getExpr() and + DataFlow::localFlow(node, switchExprNode) +} + +/** + * Holds if `inputNode` is the exit node of a parameter to `fd` and data flows + * from `inputNode` to the expression of a switch statement. + */ +private predicate isPossibleInputNode(DataFlow::Node inputNode, FuncDef fd) { + inputNode = any(FunctionInput inp | inp.isParameter(_)).getExitNode(fd) and + flowsToSwitchExpression(inputNode, _) +} + +/** + * Gets a predecessor of `succ` without following edges corresponding to + * passing a constant case test in a switch statement which is switching on + * an expression which data flows to from `inputNode`. + */ +private ControlFlow::Node getANonTestPassingPredecessor( + ControlFlow::Node succ, DataFlow::Node inputNode +) { + isPossibleInputNode(inputNode, succ.getRoot().(FuncDef)) and + result = succ.getAPredecessor() and + not exists(Expr testExpr, DataFlow::Node switchExprNode | + flowsToSwitchExpression(inputNode, switchExprNode) and + ControlFlow::isSwitchCaseTestPassingEdge(result, succ, switchExprNode.asExpr(), testExpr) and + testExpr.isConst() + ) +} + +private ControlFlow::Node getANonTestPassingReachingNodeRecursive( + ControlFlow::Node n, DataFlow::Node inputNode +) { + isPossibleInputNode(inputNode, n.getRoot().(FuncDef)) and + ( + result = n or + result = + getANonTestPassingReachingNodeRecursive(getANonTestPassingPredecessor(n, inputNode), inputNode) + ) +} + +/** + * Gets a node by following predecessors from `ret` without following edges + * corresponding to passing a constant case test in a switch statement which is + * switching on an expression which data flows to from `inputNode`. + */ +private ControlFlow::Node getANonTestPassingReachingNodeBase( + IR::ReturnInstruction ret, DataFlow::Node inputNode +) { + result = getANonTestPassingReachingNodeRecursive(ret, inputNode) +} + +/** + * Holds if every way to get from the entry node of the function to `ret` + * involves passing a constant test case in a switch statement which is + * switching on an expression which data flows to from `inputNode`. + */ +private predicate mustPassConstantCaseTestToReach( + IR::ReturnInstruction ret, DataFlow::Node inputNode +) { + isPossibleInputNode(inputNode, ret.getRoot().(FuncDef)) and + not exists(ControlFlow::Node entry | entry = ret.getRoot().getEntryNode() | + entry = getANonTestPassingReachingNodeBase(ret, inputNode) + ) +} + +/** + * Holds if whenever `outp` of function `f` satisfies `p`, the input `inp` of + * `f` matched a constant in a case clause of a switch statement. + * + * We check this by looking for guards on `inp` that collectively dominate all + * the `return` statements in `f` that can return `true`. This means that if + * `f` returns `true`, one of the guards must have been satisfied. (Similar + * reasoning is applied for statements returning `false`, `nil` or a non-`nil` + * value.) + */ +predicate functionEnsuresInputIsConstant( + Function f, FunctionInput inp, FunctionOutput outp, DataFlow::Property p +) { + exists(FuncDecl fd | fd.getFunction() = f | + exists(boolean b | + p.isBoolean(b) and + forex(DataFlow::Node ret, IR::ReturnInstruction ri | + ret = outp.getEntryNode(fd) and + ri.getReturnStmt().getAnExpr() = ret.asExpr() and + DataFlow::possiblyReturnsBool(fd, outp, ret, b) + | + mustPassConstantCaseTestToReach(ri, inp.getExitNode(fd)) + ) + ) + or + p.isNonNil() and + forex(DataFlow::Node ret, IR::ReturnInstruction ri | + ret = outp.getEntryNode(fd) and + ri.getReturnStmt().getAnExpr() = ret.asExpr() and + DataFlow::possiblyReturnsNonNil(fd, outp, ret) + | + mustPassConstantCaseTestToReach(ri, inp.getExitNode(fd)) + ) + or + p.isNil() and + forex(DataFlow::Node ret, IR::ReturnInstruction ri | + ret = outp.getEntryNode(fd) and + ri.getReturnStmt().getAnExpr() = ret.asExpr() and + ret.asExpr() = Builtin::nil().getAReference() + | + exists(DataFlow::Node exprNode | + DataFlow::localFlow(inp.getExitNode(fd), exprNode) and + mustPassConstantCaseTestToReach(ri, inp.getExitNode(fd)) + ) + ) + ) +} + +/** + * Holds if whenever `outputNode` satisfies `p`, `inputNode` matched a constant + * in a case clause of a switch statement. + */ +pragma[noinline] +predicate inputIsConstantIfOutputHasProperty( + DataFlow::Node inputNode, DataFlow::Node outputNode, DataFlow::Property p +) { + exists(Function f, FunctionInput inp, FunctionOutput outp, DataFlow::CallNode call | + functionEnsuresInputIsConstant(f, inp, outp, p) and + call = f.getACall() and + inputNode = inp.getNode(call) and + DataFlow::localFlow(outp.getNode(call), outputNode) + ) +} + +/** + * A comparison against a list of constants, acting as a sanitizer guard for + * `guardedExpr` by restricting it to a known value. + * + * Currently this only looks for functions containing a switch statement, but + * it could equally look for a check for membership of a constant map or + * constant array, which does not need to be in its own function. + */ +class ListOfConstantsComparisonSanitizerGuard extends TaintTracking::DefaultTaintSanitizerGuard { + DataFlow::Node guardedExpr; + boolean outcome; + + ListOfConstantsComparisonSanitizerGuard() { + exists(DataFlow::Node outputNode, DataFlow::Property p | + inputIsConstantIfOutputHasProperty(guardedExpr, outputNode, p) and + p.checkOn(this, outcome, outputNode) + ) + } + + override predicate checks(Expr e, boolean branch) { + e = guardedExpr.asExpr() and branch = outcome + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll new file mode 100644 index 00000000000..450a9ba7e1f --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingImpl.qll @@ -0,0 +1,117 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** + * Holds if `sink` is a relevant taint sink. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + + /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */ + predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { + isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard) + } + + /** + * Holds if the additional taint propagation step from `node1` to `node2` + * must be taken into account in the analysis. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingParameter.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingParameter.qll new file mode 100644 index 00000000000..b38482194ec --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking1/TaintTrackingParameter.qll @@ -0,0 +1,5 @@ +import semmle.go.dataflow.internal.TaintTrackingUtil as Public + +module Private { + import semmle.go.dataflow.DataFlow::DataFlow as DataFlow +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll new file mode 100644 index 00000000000..450a9ba7e1f --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingImpl.qll @@ -0,0 +1,117 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSource(DataFlow::Node source); + + /** + * Holds if `sink` is a relevant taint sink. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + abstract override predicate isSink(DataFlow::Node sink); + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { isSanitizerOut(node) } + + /** Holds if taint propagation through nodes guarded by `guard` is prohibited. */ + predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { + isSanitizerGuard(guard) or defaultTaintSanitizerGuard(guard) + } + + /** + * Holds if the additional taint propagation step from `node1` to `node2` + * must be taken into account in the analysis. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll new file mode 100644 index 00000000000..1130c2e42e1 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dataflow/internal/tainttracking2/TaintTrackingParameter.qll @@ -0,0 +1,5 @@ +import semmle.go.dataflow.internal.TaintTrackingUtil as Public + +module Private { + import semmle.go.dataflow.DataFlow2::DataFlow2 as DataFlow +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dependencies/Dependencies.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dependencies/Dependencies.qll new file mode 100644 index 00000000000..5fc746e6987 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dependencies/Dependencies.qll @@ -0,0 +1,95 @@ +/** + * Provides classes for modeling go.mod dependencies. + */ + +import go + +/** + * An abstract representation of a dependency. + */ +abstract class Dependency extends Locatable { + /** + * Holds if this dependency has package path `path` and version `v`. + * + * If the version cannot be determined, `v` is bound to the string + * `"unknown"`. + */ + abstract predicate info(string path, string v); + + /** Gets the package path of this dependency. */ + string getDepPath() { this.info(result, _) } + + /** Gets the version of this dependency. */ + string getDepVersion() { this.info(_, result) } + + /** + * Holds if this dependency is relevant for imports in file `file`. That is, an import of this + * dependency's path that is in `file` will use this dependency. + */ + abstract predicate relevantForFile(File file); + + /** + * An import of this dependency. + */ + ImportSpec getAnImport() { + result.getPath().regexpMatch("\\Q" + this.getDepPath() + "\\E(/.*)?") and + this.relevantForFile(result.getFile()) + } +} + +/** + * A dependency from a go.mod file. + */ +class GoModDependency extends Dependency, GoModRequireLine { + override predicate info(string path, string v) { + this.replacementInfo(path, v) + or + not this.replacementInfo(_, _) and + this.originalInfo(path, v) + } + + override predicate relevantForFile(File file) { + exists(Folder parent | parent.getAFile() = this.getFile() | + parent.getAFolder*().getAFile() = file + ) + } + + /** + * Holds if there is a replace line that replaces this dependency with a dependency on `path`, + * version `v`. + */ + predicate replacementInfo(string path, string v) { + exists(GoModReplaceLine replace | + replace.getFile() = this.getFile() and + replace.getOriginalPath() = this.getPath() + | + path = replace.getReplacementPath() and + ( + v = replace.getReplacementVersion() + or + not exists(replace.getReplacementVersion()) and + v = "unknown" + ) + ) + } + + /** + * Get a version that was excluded for this dependency. + */ + string getAnExcludedVersion() { + exists(GoModExcludeLine exclude | + exclude.getFile() = this.getFile() and + exclude.getPath() = this.getPath() + | + result = exclude.getVersion() + ) + } + + /** + * Holds if this require line originally states dependency `path` had version `ver`. + * + * The actual info of this dependency can change based on `replace` directives in the same go.mod + * file, which replace a dependency with another one. + */ + predicate originalInfo(string path, string v) { path = this.getPath() and v = this.getVersion() } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/dependencies/SemVer.qll b/repo-tests/codeql-go/ql/lib/semmle/go/dependencies/SemVer.qll new file mode 100644 index 00000000000..88d37563931 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/dependencies/SemVer.qll @@ -0,0 +1,100 @@ +/** + * Provides classes for dealing with semantic versions, for dependency versions. + */ + +import semmle.go.dependencies.Dependencies + +/** + * A SemVer-formatted version string in a dependency. + * + * Pre-release information and build metadata is not yet supported. + */ +class DependencySemVer extends string { + Dependency dep; + string normalized; + + DependencySemVer() { + this = dep.getDepVersion() and + normalized = normalizeSemver(this) + } + + /** + * Holds if this version may be before `last`. + */ + bindingset[last] + predicate maybeBefore(string last) { normalized < normalizeSemver(last) } + + /** + * Holds if this version may be after `first`. + */ + bindingset[first] + predicate maybeAfter(string first) { normalizeSemver(first) < normalized } + + /** + * Holds if this version may be between `first` (inclusive) and `last` (exclusive). + */ + bindingset[first, last] + predicate maybeBetween(string first, string last) { + normalizeSemver(first) <= normalized and + normalized < normalizeSemver(last) + } + + /** + * Holds if this version is equivalent to `other`. + */ + bindingset[other] + predicate is(string other) { normalized = normalizeSemver(other) } + + /** + * Gets the dependency that uses this string. + */ + Dependency getDependency() { result = dep } +} + +bindingset[str] +private string leftPad(string str) { result = ("000" + str).suffix(str.length()) } + +/** + * Normalizes a SemVer string such that the lexicographical ordering + * of two normalized strings is consistent with the SemVer ordering. + * + * Pre-release information and build metadata is not yet supported. + */ +bindingset[orig] +private string normalizeSemver(string orig) { + exists(string pattern, string major, string minor, string patch | + pattern = "v?(\\d+)\\.(\\d+)\\.(\\d+)(\\D.*)?" and + major = orig.regexpCapture(pattern, 1) and + minor = orig.regexpCapture(pattern, 2) and + patch = orig.regexpCapture(pattern, 3) + | + result = leftPad(major) + "." + leftPad(minor) + "." + leftPad(patch) + ) +} + +/** + * A version string in a dependency that has a SemVer, but also contains a git commit SHA. + * + * This class is useful for interacting with go.mod versions, which use SemVer, but can also contain + * SHAs if no useful tags are found, or when a user wishes to specify a commit SHA. + * + * Pre-release information and build metadata is not yet supported. + */ +class DependencySemShaVersion extends DependencySemVer { + string sha; + + DependencySemShaVersion() { sha = this.regexpCapture(".*-([0-9a-f]+)", 1) } + + /** + * Gets the commit SHA associated with this version. + */ + string getSha() { result = sha } + + bindingset[other] + override predicate is(string other) { + this.getSha() = other.(DependencySemShaVersion).getSha() + or + not other instanceof DependencySemShaVersion and + super.is(other) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Beego.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Beego.qll new file mode 100644 index 00000000000..bff7d27fb96 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Beego.qll @@ -0,0 +1,359 @@ +/** + * Provides classes for working with untrusted flow sources, sinks and taint propagators + * from the `github.com/beego/beego` package. + */ + +import go +import semmle.go.security.Xss +private import semmle.go.security.SafeUrlFlowCustomizations + +/** + * Provides classes for working with untrusted flow sources, sinks and taint propagators + * from the [Beego](`github.com/beego/beego`) package. + */ +module Beego { + /** Gets the module path `github.com/astaxie/beego` or `github.com/beego/beego`. */ + string modulePath() { result = ["github.com/astaxie/beego", "github.com/beego/beego"] } + + /** Gets the path for the root package of beego. */ + string packagePath() { result = package(modulePath(), "") } + + /** Gets the path for the context package of beego. */ + string contextPackagePath() { result = package(modulePath(), "context") } + + /** Gets the path for the logs package of beego. */ + string logsPackagePath() { result = package(modulePath(), "logs") } + + /** Gets the path for the utils package of beego. */ + string utilsPackagePath() { result = package(modulePath(), "utils") } + + /** + * `BeegoInput` sources of untrusted data. + */ + private class BeegoInputSource extends UntrustedFlowSource::Range { + string methodName; + FunctionOutput output; + + BeegoInputSource() { + exists(DataFlow::MethodCallNode c | this = output.getExitNode(c) | + c.getTarget().hasQualifiedName(contextPackagePath(), "BeegoInput", methodName) + ) and + ( + methodName = "Bind" and + output.isParameter(0) + or + methodName in [ + "Cookie", "Data", "GetData", "Header", "Param", "Params", "Query", "Refer", "Referer", + "URI", "URL", "UserAgent" + ] and + output.isResult(0) + ) + } + + predicate isSafeUrlSource() { methodName in ["URI", "URL"] } + } + + /** `BeegoInput` sources that are safe to use for redirection. */ + private class BeegoInputSafeUrlSource extends SafeUrlFlow::Source { + BeegoInputSafeUrlSource() { this.(BeegoInputSource).isSafeUrlSource() } + } + + /** + * `beego.Controller` sources of untrusted data. + */ + private class BeegoControllerSource extends UntrustedFlowSource::Range { + string methodName; + FunctionOutput output; + + BeegoControllerSource() { + exists(DataFlow::MethodCallNode c | + c.getTarget().hasQualifiedName(packagePath(), "Controller", methodName) + | + this = output.getExitNode(c) + ) and + ( + methodName = "ParseForm" and + output.isParameter(0) + or + methodName in ["GetFile", "GetFiles", "GetString", "GetStrings", "Input"] and + output.isResult(0) + or + methodName = "GetFile" and + output.isResult(1) + ) + } + } + + /** + * `beego/context.Context` sources of untrusted data. + */ + private class BeegoContextSource extends UntrustedFlowSource::Range { + BeegoContextSource() { + exists(Method m | m.hasQualifiedName(contextPackagePath(), "Context", "GetCookie") | + this = m.getACall().getResult() + ) + } + } + + private class BeegoOutputInstance extends HTTP::ResponseWriter::Range { + SsaWithFields v; + + BeegoOutputInstance() { + this = v.getBaseVariable().getSourceVariable() and + v.getType().(PointerType).getBaseType().hasQualifiedName(contextPackagePath(), "BeegoOutput") + } + + override DataFlow::Node getANode() { result = v.similar().getAUse().getASuccessor*() } + + /** Gets a header object that corresponds to this HTTP response. */ + DataFlow::MethodCallNode getAHeaderObject() { + result.getTarget().getName() = ["ContentType", "Header"] and + this.getANode() = result.getReceiver() + } + } + + private class BeegoHeaderWrite extends HTTP::HeaderWrite::Range, DataFlow::MethodCallNode { + string methodName; + + BeegoHeaderWrite() { + this.getTarget().hasQualifiedName(contextPackagePath(), "BeegoOutput", methodName) and + methodName in ["ContentType", "Header"] + } + + override DataFlow::Node getName() { methodName = "Header" and result = this.getArgument(0) } + + override string getHeaderName() { + result = HTTP::HeaderWrite::Range.super.getHeaderName() + or + methodName = "ContentType" and result = "content-type" + } + + override DataFlow::Node getValue() { + if methodName = "ContentType" + then result = this.getArgument(0) + else result = this.getArgument(1) + } + + override HTTP::ResponseWriter getResponseWriter() { + result.(BeegoOutputInstance).getAHeaderObject() = this + } + } + + private class BeegoResponseBody extends HTTP::ResponseBody::Range { + DataFlow::MethodCallNode call; + string methodName; + + BeegoResponseBody() { + exists(Method m | m.hasQualifiedName(contextPackagePath(), "BeegoOutput", methodName) | + call = m.getACall() and + this = call.getArgument(0) + ) and + methodName in ["Body", "JSON", "JSONP", "ServeFormatted", "XML", "YAML"] + } + + override HTTP::ResponseWriter getResponseWriter() { result.getANode() = call.getReceiver() } + + override string getAContentType() { + // Super-method provides content-types for `Body`, which requires us to search + // for `ContentType` and `Header` calls against the same `BeegoOutput` instance + result = super.getAContentType() + or + // Specifically describe methods that set the content-type and body in one operation: + result = "application/json" and methodName = "JSON" + or + result = "application/javascript" and methodName = "JSONP" + or + // Actually ServeFormatted can serve JSON, XML or YAML depending on the incoming + // `Accept` header, but the important bit is this method cannot serve text/html. + result = "application/json" and methodName = "ServeFormatted" + or + result = "text/xml" and methodName = "XML" + or + result = "application/x-yaml" and methodName = "YAML" + } + } + + private class ControllerResponseBody extends HTTP::ResponseBody::Range { + string name; + + ControllerResponseBody() { + exists(Method m | m.hasQualifiedName(packagePath(), "Controller", name) | + name = "CustomAbort" and this = m.getACall().getArgument(1) + or + name = "SetData" and this = m.getACall().getArgument(0) + ) + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + + override string getAContentType() { + // Actually SetData can serve JSON, XML or YAML depending on the incoming + // `Accept` header, but the important bit is this method cannot serve text/html. + result = "application/json" and name = "SetData" + // CustomAbort doesn't specify a content type, so we assume anything could happen. + } + } + + private class ContextResponseBody extends HTTP::ResponseBody::Range { + string name; + + ContextResponseBody() { + exists(Method m | m.hasQualifiedName(contextPackagePath(), "Context", name) | + name = "Abort" and this = m.getACall().getArgument(1) + or + name = "WriteString" and this = m.getACall().getArgument(0) + ) + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + + // Neither method is likely to be used with well-typed data such as JSON output, + // because there are better methods to do this. Assume the Content-Type could + // be anything. + override string getAContentType() { none() } + } + + private string getALogFunctionName() { + result = + [ + "Alert", "Critical", "Debug", "Emergency", "Error", "Info", "Informational", "Notice", + "Trace", "Warn", "Warning" + ] + } + + private class ToplevelBeegoLoggers extends LoggerCall::Range, DataFlow::CallNode { + ToplevelBeegoLoggers() { + this.getTarget().hasQualifiedName([packagePath(), logsPackagePath()], getALogFunctionName()) + } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } + + private class BeegoLoggerMethods extends LoggerCall::Range, DataFlow::MethodCallNode { + BeegoLoggerMethods() { + this.getTarget().hasQualifiedName(logsPackagePath(), "BeeLogger", getALogFunctionName()) + } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } + + private class UtilLoggers extends LoggerCall::Range, DataFlow::CallNode { + UtilLoggers() { this.getTarget().hasQualifiedName(utilsPackagePath(), "Display") } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } + + private class TopLevelTaintPropagators extends TaintTracking::FunctionModel { + string name; + + TopLevelTaintPropagators() { + this.hasQualifiedName(packagePath(), name) and + name in ["HTML2str", "Htmlquote", "Htmlunquote", "MapGet", "ParseForm", "Str2html", "Substr"] + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + name in ["HTML2str", "Htmlquote", "Htmlunquote", "MapGet", "Str2html", "Substr"] and + input.isParameter(0) and + output.isResult(0) + or + name = "ParseForm" and + input.isParameter(0) and + output.isParameter(1) + } + } + + private class ContextTaintPropagators extends TaintTracking::FunctionModel { + ContextTaintPropagators() { this.hasQualifiedName(contextPackagePath(), "WriteBody") } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + input.isParameter(2) and output.isParameter(1) + } + } + + private class HtmlQuoteSanitizer extends SharedXss::Sanitizer { + HtmlQuoteSanitizer() { + exists(DataFlow::CallNode c | c.getTarget().hasQualifiedName(packagePath(), "Htmlquote") | + this = c.getArgument(0) + ) + } + } + + private class FsOperations extends FileSystemAccess::Range, DataFlow::CallNode { + FsOperations() { + this.getTarget().hasQualifiedName(packagePath(), "Walk") + or + exists(Method m | this = m.getACall() | + m.hasQualifiedName(packagePath(), "FileSystem", "Open") or + m.hasQualifiedName(packagePath(), "Controller", "SaveToFile") + ) + } + + override DataFlow::Node getAPathArgument() { + this.getTarget().getName() = ["Walk", "SaveToFile"] and result = this.getArgument(1) + or + this.getTarget().getName() = "Open" and result = this.getArgument(0) + } + } + + private class RedirectMethods extends HTTP::Redirect::Range, DataFlow::CallNode { + string package; + string className; + + RedirectMethods() { + ( + package = packagePath() and className = "Controller" + or + package = contextPackagePath() and className = "Context" + ) and + this = any(Method m | m.hasQualifiedName(package, className, "Redirect")).getACall() + } + + override DataFlow::Node getUrl() { + className = "Controller" and result = this.getArgument(0) + or + className = "Context" and result = this.getArgument(1) + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } + + private class UtilsTaintPropagators extends TaintTracking::FunctionModel { + string name; + + UtilsTaintPropagators() { + this.hasQualifiedName(utilsPackagePath(), name) and + name in [ + "GetDisplayString", "SliceChunk", "SliceDiff", "SliceFilter", "SliceIntersect", + "SliceMerge", "SlicePad", "SliceRand", "SliceReduce", "SliceShuffle", "SliceUnique" + ] + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + name in [ + "GetDisplayString", "SliceIntersect", "SliceMerge", "SlicePad", "SliceRand", + "SliceShuffle", "SliceUnique" + ] and + input.isParameter(_) and + output.isResult(0) + or + name in ["SliceChunk", "SliceDiff", "SliceFilter", "SliceReduce"] and + input.isParameter(0) and + output.isResult(0) + } + } + + private class BeeMapModels extends TaintTracking::FunctionModel, Method { + string name; + + BeeMapModels() { + this.hasQualifiedName(utilsPackagePath(), "BeeMap", name) and + name in ["Get", "Set", "Items"] + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + name = "Set" and input.isParameter(1) and output.isReceiver() + or + name in ["Get", "Items"] and input.isReceiver() and output.isResult(0) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/BeegoOrm.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/BeegoOrm.qll new file mode 100644 index 00000000000..657f89d59b2 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/BeegoOrm.qll @@ -0,0 +1,101 @@ +/** + * Provides classes for working with untrusted flow sources, sinks and taint propagators + * from the `github.com/astaxie/beego/orm` subpackage. + */ + +import go +private import semmle.go.security.StoredXssCustomizations + +/** + * Provides classes for working with untrusted flow sources, sinks and taint propagators + * from the [Beego ORM](`github.com/astaxie/beego/orm`) subpackage. + */ +module BeegoOrm { + /** Gets the package name `github.com/astaxie/beego/orm`. */ + string packagePath() { result = package("github.com/astaxie/beego", "orm") } + + private class DbSink extends SQL::QueryString::Range { + DbSink() { + exists(Method m, string methodName, int argNum | + m.hasQualifiedName(packagePath(), "DB", methodName) and + methodName in [ + "Exec", "ExecContext", "Prepare", "PrepareContext", "Query", "QueryContext", "QueryRow", + "QueryRowContext" + ] and + if methodName.matches("%Context") then argNum = 1 else argNum = 0 + | + this = m.getACall().getArgument(argNum) + ) + } + } + + private class QueryBuilderSink extends SQL::QueryString::Range { + // Note this class doesn't do any escaping, unlike the true ORM part of the package + QueryBuilderSink() { + exists(Method impl | impl.implements(packagePath(), "QueryBuilder", _) | + this = impl.getACall().getAnArgument() + ) and + this.getType().getUnderlyingType() instanceof StringType + } + } + + private class OrmerRawSink extends SQL::QueryString::Range { + OrmerRawSink() { + exists(Method impl | impl.implements(packagePath(), "Ormer", "Raw") | + this = impl.getACall().getArgument(0) + ) + } + } + + private class QuerySeterFilterRawSink extends SQL::QueryString::Range { + QuerySeterFilterRawSink() { + exists(Method impl | impl.implements(packagePath(), "QuerySeter", "FilterRaw") | + this = impl.getACall().getArgument(1) + ) + } + } + + private class ConditionRawSink extends SQL::QueryString::Range { + ConditionRawSink() { + exists(Method impl | impl.implements(packagePath(), "Condition", "Raw") | + this = impl.getACall().getArgument(1) + ) + } + } + + private class OrmerSource extends StoredXss::Source { + OrmerSource() { + exists(Method impl | + impl.implements(packagePath(), "Ormer", ["Read", "ReadForUpdate", "ReadOrCreate"]) + | + this = FunctionOutput::parameter(0).getExitNode(impl.getACall()) + ) + } + } + + private class StringFieldSource extends StoredXss::Source { + StringFieldSource() { + exists(Method m | + m.hasQualifiedName(packagePath(), ["JSONField", "JsonbField", "TextField"], + ["RawValue", "String", "Value"]) + | + this = m.getACall().getResult() + ) + } + } + + private class SeterSource extends StoredXss::Source { + SeterSource() { + exists(Method impl | + // All and One are exclusive to QuerySeter, QueryRow[s] are exclusive to RawSeter, the rest are common. + impl.implements(packagePath(), ["QuerySeter", "RawSeter"], + [ + "All", "One", "Values", "ValuesList", "ValuesFlat", "RowsToMap", "RowsToStruct", + "QueryRow", "QueryRows" + ]) + | + this = FunctionOutput::parameter(0).getExitNode(impl.getACall()) + ) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Chi.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Chi.qll new file mode 100644 index 00000000000..89f1a41d350 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Chi.qll @@ -0,0 +1,29 @@ +/** + * Provides classes for working with untrusted flow sources from the `github.com/go-chi/chi` package. + */ + +import go + +private module Chi { + /** Gets the package name `github.com/go-chi/chi`. */ + string packagePath() { result = package("github.com/go-chi/chi", "") } + + /** + * Functions that extract URL parameters, considered as a source of untrusted flow. + */ + private class UserControlledFunction extends UntrustedFlowSource::Range, DataFlow::CallNode { + UserControlledFunction() { + this.getTarget().hasQualifiedName(packagePath(), ["URLParam", "URLParamFromCtx"]) + } + } + + /** + * Methods that extract URL parameters, considered as a source of untrusted flow. + */ + private class UserControlledRequestMethod extends UntrustedFlowSource::Range, + DataFlow::MethodCallNode { + UserControlledRequestMethod() { + this.getTarget().hasQualifiedName(packagePath(), "Context", "URLParam") + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Couchbase.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Couchbase.qll new file mode 100644 index 00000000000..983c445d710 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Couchbase.qll @@ -0,0 +1,98 @@ +/** + * Provides models of commonly used functions in the official Couchbase Go SDK library. + */ + +import go + +/** + * Provides models of commonly used functions in the official Couchbase Go SDK library. + */ +module Couchbase { + /** + * Gets a package path for the official Couchbase Go SDK library. + * + * Note that v1 and v2 have different APIs, but the names are disjoint so there is no need to + * distinguish between them. + */ + string packagePath() { + result = + package([ + "gopkg.in/couchbase/gocb", "github.com/couchbase/gocb", "github.com/couchbaselabs/gocb" + ], "") + } + + /** + * Models of methods on `gocb/AnalyticsQuery` and `gocb/N1qlQuery` which which support a fluent + * interface by returning the receiver. They are not inherently relevant to taint. + */ + private class QueryMethodV1 extends TaintTracking::FunctionModel, Method { + QueryMethodV1() { + exists(string queryTypeName, string methodName | + queryTypeName = "AnalyticsQuery" and + methodName in [ + "ContextId", "Deferred", "Pretty", "Priority", "RawParam", "ServerSideTimeout" + ] + or + queryTypeName = "N1qlQuery" and + methodName in [ + "AdHoc", "Consistency", "ConsistentWith", "Custom", "PipelineBatch", "PipelineCap", + "Profile", "ReadOnly", "ScanCap", "Timeout" + ] + | + this.hasQualifiedName(packagePath(), queryTypeName, methodName) + ) + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class QueryFromN1qlStatementV1 extends TaintTracking::FunctionModel { + QueryFromN1qlStatementV1() { + this.hasQualifiedName(packagePath(), ["NewAnalyticsQuery", "NewN1qlQuery"]) + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isParameter(0) and outp.isResult() + } + } + + /** + * A query used in an API function acting on a `Bucket` or `Cluster` struct of v1 of + * the official Couchbase Go library, gocb. + */ + private class CouchbaseV1Query extends NoSQL::Query::Range { + CouchbaseV1Query() { + // func (b *Bucket) ExecuteAnalyticsQuery(q *AnalyticsQuery, params interface{}) (AnalyticsResults, error) + // func (b *Bucket) ExecuteN1qlQuery(q *N1qlQuery, params interface{}) (QueryResults, error) + // func (c *Cluster) ExecuteAnalyticsQuery(q *AnalyticsQuery, params interface{}) (AnalyticsResults, error) + // func (c *Cluster) ExecuteN1qlQuery(q *N1qlQuery, params interface{}) (QueryResults, error) + exists(Method meth, string structName, string methodName | + structName in ["Bucket", "Cluster"] and + methodName in ["ExecuteN1qlQuery", "ExecuteAnalyticsQuery"] and + meth.hasQualifiedName(packagePath(), structName, methodName) and + this = meth.getACall().getArgument(0) + ) + } + } + + /** + * A query used in an API function acting on a `Bucket` or `Cluster` struct of v1 of + * the official Couchbase Go library, gocb. + */ + private class CouchbaseV2Query extends NoSQL::Query::Range { + CouchbaseV2Query() { + // func (c *Cluster) AnalyticsQuery(statement string, opts *AnalyticsOptions) (*AnalyticsResult, error) + // func (c *Cluster) Query(statement string, opts *QueryOptions) (*QueryResult, error) + // func (s *Scope) AnalyticsQuery(statement string, opts *AnalyticsOptions) (*AnalyticsResult, error) + // func (s *Scope) Query(statement string, opts *QueryOptions) (*QueryResult, error) + exists(Method meth, string structName, string methodName | + structName in ["Cluster", "Scope"] and + methodName in ["AnalyticsQuery", "Query"] and + meth.hasQualifiedName(packagePath(), structName, methodName) and + this = meth.getACall().getArgument(0) + ) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Echo.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Echo.qll new file mode 100644 index 00000000000..df58b492b87 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Echo.qll @@ -0,0 +1,123 @@ +/** + * Provides classes for working with untrusted flow sources, taint propagators, and HTTP sinks + * from the `github.com/labstack/echo` package. + */ + +import go + +private module Echo { + /** Gets the package name `github.com/labstack/echo`. */ + private string packagePath() { result = package("github.com/labstack/echo", "") } + + /** + * Data from a `Context` interface method, considered as a source of untrusted flow. + */ + private class EchoContextSource extends UntrustedFlowSource::Range { + EchoContextSource() { + exists(DataFlow::MethodCallNode call, string methodName | + methodName = + [ + "Param", "ParamValues", "QueryParam", "QueryParams", "QueryString", "FormValue", + "FormParams", "FormFile", "MultipartForm", "Cookie", "Cookies" + ] and + call.getTarget().hasQualifiedName(packagePath(), "Context", methodName) and + this = call.getResult(0) + ) + } + } + + /** + * Data from a `Context` interface method that is not generally exploitable for open-redirect attacks. + */ + private class EchoContextRedirectUnexploitableSource extends HTTP::Redirect::UnexploitableSource { + EchoContextRedirectUnexploitableSource() { + exists(DataFlow::MethodCallNode call, string methodName | + methodName = ["FormValue", "FormParams", "FormFile", "MultipartForm", "Cookie", "Cookies"] and + call.getTarget().hasQualifiedName(packagePath(), "Context", methodName) and + this = call.getResult(0) + ) + } + } + + /** + * Models of `Context.Get/Set`. `Context` behaves like a map, with corresponding taint propagation. + */ + private class ContextMapModels extends TaintTracking::FunctionModel, Method { + string methodName; + FunctionInput input; + FunctionOutput output; + + ContextMapModels() { + ( + methodName = "Get" and input.isReceiver() and output.isResult() + or + methodName = "Set" and input.isParameter(1) and output.isReceiver() + ) and + this.hasQualifiedName(packagePath(), "Context", methodName) + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp = input and outp = output + } + } + + /** + * A call to a method on `Context` struct that unmarshals data into a target. + */ + private class EchoContextBinder extends UntrustedFlowSource::Range { + EchoContextBinder() { + exists(DataFlow::MethodCallNode call | + call.getTarget().hasQualifiedName(packagePath(), "Context", "Bind") + | + this = FunctionOutput::parameter(0).getExitNode(call) + ) + } + } + + /** + * `echo.Context` methods which set the content-type to `text/html` and write a result in one operation. + */ + private class EchoHtmlOutputs extends HTTP::ResponseBody::Range { + EchoHtmlOutputs() { + exists(Method m | m.hasQualifiedName(packagePath(), "Context", ["HTML", "HTMLBlob"]) | + this = m.getACall().getArgument(1) + ) + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + + override string getAContentType() { result = "text/html" } + } + + /** + * `echo.Context` methods which take a content-type as a parameter. + */ + private class EchoParameterizedOutputs extends HTTP::ResponseBody::Range { + DataFlow::CallNode callNode; + + EchoParameterizedOutputs() { + exists(Method m | m.hasQualifiedName(packagePath(), "Context", ["Blob", "Stream"]) | + callNode = m.getACall() and this = callNode.getArgument(2) + ) + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + + override DataFlow::Node getAContentTypeNode() { result = callNode.getArgument(1) } + } + + /** + * The `echo.Context.Redirect` method. + */ + private class EchoRedirectMethod extends HTTP::Redirect::Range, DataFlow::CallNode { + EchoRedirectMethod() { + exists(Method m | m.hasQualifiedName(packagePath(), "Context", "Redirect") | + this = m.getACall() + ) + } + + override DataFlow::Node getUrl() { result = this.getArgument(1) } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/ElazarlGoproxy.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/ElazarlGoproxy.qll new file mode 100644 index 00000000000..0ab4e48375b --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/ElazarlGoproxy.qll @@ -0,0 +1,134 @@ +/** + * Provides classes for working with concepts relating to the [github.com/elazarl/goproxy](https://pkg.go.dev/github.com/elazarl/goproxy) package. + */ + +import go + +/** + * Provides classes for working with concepts relating to the [github.com/elazarl/goproxy](https://pkg.go.dev/github.com/elazarl/goproxy) package. + */ +module ElazarlGoproxy { + /** Gets the package name. */ + string packagePath() { result = package("github.com/elazarl/goproxy", "") } + + private class NewResponse extends HTTP::HeaderWrite::Range, DataFlow::CallNode { + NewResponse() { this.getTarget().hasQualifiedName(packagePath(), "NewResponse") } + + override string getHeaderName() { this.definesHeader(result, _) } + + override string getHeaderValue() { this.definesHeader(_, result) } + + override DataFlow::Node getName() { none() } + + override DataFlow::Node getValue() { result = this.getArgument([1, 2]) } + + override predicate definesHeader(string header, string value) { + header = "status" and value = this.getArgument(2).getIntValue().toString() + or + header = "content-type" and value = this.getArgument(1).getStringValue() + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } + + /** A body argument to a `NewResponse` call. */ + private class NewResponseBody extends HTTP::ResponseBody::Range { + NewResponse call; + + NewResponseBody() { this = call.getArgument(3) } + + override DataFlow::Node getAContentTypeNode() { result = call.getArgument(1) } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } + + private class TextResponse extends HTTP::HeaderWrite::Range, DataFlow::CallNode { + TextResponse() { this.getTarget().hasQualifiedName(packagePath(), "TextResponse") } + + override string getHeaderName() { this.definesHeader(result, _) } + + override string getHeaderValue() { this.definesHeader(_, result) } + + override DataFlow::Node getName() { none() } + + override DataFlow::Node getValue() { none() } + + override predicate definesHeader(string header, string value) { + header = "status" and value = "200" + or + header = "content-type" and value = "text/plain" + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } + + /** A body argument to a `TextResponse` call. */ + private class TextResponseBody extends HTTP::ResponseBody::Range, TextResponse { + TextResponse call; + + TextResponseBody() { this = call.getArgument(2) } + + override DataFlow::Node getAContentTypeNode() { result = call.getArgument(1) } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } + + /** A handler attached to a goproxy proxy type. */ + private class ProxyHandler extends HTTP::RequestHandler::Range { + DataFlow::MethodCallNode handlerReg; + + ProxyHandler() { + handlerReg + .getTarget() + .hasQualifiedName(packagePath(), "ReqProxyConds", ["Do", "DoFunc", "HandleConnect"]) and + this = handlerReg.getArgument(0) + } + + override predicate guardedBy(DataFlow::Node check) { + // note OnResponse is not modeled, as that server responses are not currently considered untrusted input + exists(DataFlow::MethodCallNode onreqcall | + onreqcall.getTarget().hasQualifiedName(packagePath(), "ProxyHttpServer", "OnRequest") + | + handlerReg.getReceiver() = onreqcall.getASuccessor*() and + check = onreqcall.getArgument(0) + ) + } + } + + private class UserControlledRequestData extends UntrustedFlowSource::Range { + UserControlledRequestData() { + exists(DataFlow::FieldReadNode frn | this = frn | + // liberally consider ProxyCtx.UserData to be untrusted; it's a data field set by a request handler + frn.getField().hasQualifiedName(packagePath(), "ProxyCtx", "UserData") + ) + or + exists(DataFlow::MethodCallNode call | this = call | + call.getTarget().hasQualifiedName(packagePath(), "ProxyCtx", "Charset") + ) + } + } + + private class ProxyLog extends LoggerCall::Range, DataFlow::MethodCallNode { + ProxyLog() { this.getTarget().hasQualifiedName(packagePath(), "ProxyCtx", ["Logf", "Warnf"]) } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } + + private class MethodModels extends TaintTracking::FunctionModel, Method { + FunctionInput inp; + FunctionOutput outp; + + MethodModels() { + // Methods: + // signature: func CertStorage.Fetch(hostname string, gen func() (*tls.Certificate, error)) (*tls.Certificate, error) + // + // `hostname` excluded because if the cert storage or generator function themselves have not + // been tainted, `hostname` would be unlikely to fetch user-controlled data + this.hasQualifiedName(packagePath(), "CertStorage", "Fetch") and + (inp.isReceiver() or inp.isParameter(1)) and + outp.isResult(0) + } + + override predicate hasTaintFlow(FunctionInput i, FunctionOutput o) { i = inp and o = outp } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Email.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Email.qll new file mode 100644 index 00000000000..049af5f87bf --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Email.qll @@ -0,0 +1,114 @@ +/** Provides classes for working with email-related APIs. */ + +import go + +/** + * A data-flow node that represents data written to an email, either as part + * of the headers or as part of the body. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `EmailData::Range` instead. + */ +class EmailData extends DataFlow::Node { + EmailData::Range self; + + EmailData() { this = self } +} + +/** Provides classes for working with data that is incorporated into an email. */ +module EmailData { + /** + * A data-flow node that represents data which is written to an email, either as part + * of the headers or as part of the body. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `EmailData` instead. + */ + abstract class Range extends DataFlow::Node { } + + /** A data-flow node that is written to an email using the net/smtp package. */ + private class SmtpData extends Range { + SmtpData() { + // func (c *Client) Data() (io.WriteCloser, error) + exists(Method data | + data.hasQualifiedName("net/smtp", "Client", "Data") and + this.(DataFlow::SsaNode).getInit() = data.getACall().getResult(0) + ) + or + // func SendMail(addr string, a Auth, from string, to []string, msg []byte) error + exists(Function sendMail | + sendMail.hasQualifiedName("net/smtp", "SendMail") and + this = sendMail.getACall().getArgument(4) + ) + } + } + + /** Gets the package name `github.com/sendgrid/sendgrid-go/helpers/mail`. */ + private string sendgridMail() { + result = package("github.com/sendgrid/sendgrid-go", "helpers/mail") + } + + private class NewContent extends TaintTracking::FunctionModel { + NewContent() { + // func NewContent(contentType string, value string) *Content + this.hasQualifiedName(sendgridMail(), "NewContent") + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + input.isParameter(1) and output.isResult() + } + } + + /** A data-flow node that is written to an email using the sendgrid/sendgrid-go package. */ + private class SendGridEmail extends Range { + SendGridEmail() { + // func NewSingleEmail(from *Email, subject string, to *Email, plainTextContent string, htmlContent string) *SGMailV3 + exists(Function newSingleEmail | + newSingleEmail.hasQualifiedName(sendgridMail(), "NewSingleEmail") and + this = newSingleEmail.getACall().getArgument([1, 3, 4]) + ) + or + // func NewV3MailInit(from *Email, subject string, to *Email, content ...*Content) *SGMailV3 + exists(Function newv3MailInit | + newv3MailInit.hasQualifiedName(sendgridMail(), "NewV3MailInit") and + this = newv3MailInit.getACall().getArgument(any(int i | i = 1 or i >= 3)) + ) + or + // func (s *SGMailV3) AddContent(c ...*Content) *SGMailV3 + exists(Method addContent | + addContent.hasQualifiedName(sendgridMail(), "SGMailV3", "AddContent") and + this = addContent.getACall().getAnArgument() + ) + } + } +} + +/** + * A taint model of the `Writer.CreatePart` method from `mime/multipart`. + * + * If tainted data is written to the multipart section created by this method, the underlying writer + * should be considered tainted as well. + */ +private class MultipartWriterCreatePartModel extends TaintTracking::FunctionModel, Method { + MultipartWriterCreatePartModel() { + this.hasQualifiedName("mime/multipart", "Writer", "CreatePart") + } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + input.isResult(0) and output.isReceiver() + } +} + +/** + * A taint model of the `NewWriter` function from `mime/multipart`. + * + * If tainted data is written to the writer created by this function, the underlying writer + * should be considered tainted as well. + */ +private class MultipartNewWriterModel extends TaintTracking::FunctionModel { + MultipartNewWriterModel() { this.hasQualifiedName("mime/multipart", "NewWriter") } + + override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { + input.isResult() and output.isParameter(0) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Encoding.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Encoding.qll new file mode 100644 index 00000000000..a53f36bec48 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Encoding.qll @@ -0,0 +1,28 @@ +/** + * Provides classes modelling taint propagation through marshalling and encoding functions. + */ + +import go + +/** Gets the package name `github.com/json-iterator/go`. */ +private string packagePath() { result = package("github.com/json-iterator/go", "") } + +/** A model of json-iterator's `Unmarshal` function, propagating taint from the JSON input to the decoded object. */ +private class JsonIteratorUnmarshalFunction extends TaintTracking::FunctionModel, + UnmarshalingFunction::Range { + JsonIteratorUnmarshalFunction() { + this.hasQualifiedName(packagePath(), ["Unmarshal", "UnmarshalFromString"]) + or + this.(Method).implements(packagePath(), "API", ["Unmarshal", "UnmarshalFromString"]) + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(1) } + + override string getFormat() { result = "JSON" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/EvanphxJsonPatch.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/EvanphxJsonPatch.qll new file mode 100644 index 00000000000..6408b3d1d4c --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/EvanphxJsonPatch.qll @@ -0,0 +1,61 @@ +/** + * Provides classes modeling `github.com/evanphx/json-patch`. + */ + +import go + +private module EvanphxJsonPatch { + /** Gets the package name `github.com/evanphx/json-patch`. */ + private string packagePath() { result = package("github.com/evanphx/json-patch", "") } + + private class MergeMergePatches extends TaintTracking::FunctionModel { + MergeMergePatches() { this.hasQualifiedName(packagePath(), "MergeMergePatches") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + (inp.isParameter(0) or inp.isParameter(1)) and + outp.isResult(0) + } + } + + private class MergePatch extends TaintTracking::FunctionModel { + MergePatch() { this.hasQualifiedName(packagePath(), "MergePatch") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + (inp.isParameter(0) or inp.isParameter(1)) and + outp.isResult(0) + } + } + + private class CreateMergePatch extends TaintTracking::FunctionModel { + CreateMergePatch() { this.hasQualifiedName(packagePath(), "CreateMergePatch") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + (inp.isParameter(0) or inp.isParameter(1)) and + outp.isResult(0) + } + } + + private class DecodePatch extends TaintTracking::FunctionModel { + DecodePatch() { this.hasQualifiedName(packagePath(), "DecodePatch") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and + outp.isResult(0) + } + } + + private class Apply extends TaintTracking::FunctionModel, Method { + Apply() { + exists(string fn | + fn in ["Apply", "ApplyWithOptions", "ApplyIndent", "ApplyIndentWithOptions"] + | + this.hasQualifiedName(packagePath(), "Patch", fn) + ) + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + (inp.isParameter(0) or inp.isReceiver()) and + outp.isResult(0) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Gin.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Gin.qll new file mode 100644 index 00000000000..87212868ae3 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Gin.qll @@ -0,0 +1,72 @@ +/** + * Provides classes for working with untrusted flow sources from the `github.com/gin-gonic/gin` package. + */ + +import go + +private module Gin { + /** Gets the package name `github.com/gin-gonic/gin`. */ + string packagePath() { result = package("github.com/gin-gonic/gin", "") } + + /** + * Data from a `Context` struct, considered as a source of untrusted flow. + */ + private class GithubComGinGonicGinContextSource extends UntrustedFlowSource::Range { + GithubComGinGonicGinContextSource() { + // Method calls: + exists(DataFlow::MethodCallNode call, string methodName | + call.getTarget().hasQualifiedName(packagePath(), "Context", methodName) and + methodName in [ + "FullPath", "GetHeader", "QueryArray", "Query", "PostFormArray", "PostForm", "Param", + "GetStringSlice", "GetString", "GetRawData", "ClientIP", "ContentType", "Cookie", + "GetQueryArray", "GetQuery", "GetPostFormArray", "GetPostForm", "DefaultPostForm", + "DefaultQuery", "GetPostFormMap", "GetQueryMap", "GetStringMap", "GetStringMapString", + "GetStringMapStringSlice", "PostFormMap", "QueryMap" + ] + | + this = call.getResult(0) + ) + or + // Field reads: + exists(DataFlow::Field fld | + fld.hasQualifiedName(packagePath(), "Context", ["Accepted", "Params"]) and + this = fld.getARead() + ) + } + } + + private class ParamsGet extends TaintTracking::FunctionModel, Method { + ParamsGet() { this.hasQualifiedName(packagePath(), "Params", "Get") } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult(0) + } + } + + private class ParamsByName extends TaintTracking::FunctionModel, Method { + ParamsByName() { this.hasQualifiedName(packagePath(), "Params", "ByName") } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + /** + * A call to a method on `Context` struct that unmarshals data into a target. + */ + private class GithubComGinGonicGinContextBindSource extends UntrustedFlowSource::Range { + GithubComGinGonicGinContextBindSource() { + exists(DataFlow::MethodCallNode call, string methodName | + call.getTarget().hasQualifiedName(packagePath(), "Context", methodName) and + methodName in [ + "BindJSON", "BindYAML", "BindXML", "BindUri", "BindQuery", "BindWith", "BindHeader", + "MustBindWith", "Bind", "ShouldBind", "ShouldBindBodyWith", "ShouldBindJSON", + "ShouldBindQuery", "ShouldBindUri", "ShouldBindHeader", "ShouldBindWith", + "ShouldBindXML", "ShouldBindYAML" + ] + | + this = FunctionOutput::parameter(0).getExitNode(call) + ) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Glog.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Glog.qll new file mode 100644 index 00000000000..a48670a135b --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Glog.qll @@ -0,0 +1,28 @@ +/** + * Provides models of commonly used functions in the `github.com/golang/glog` and `k8s.io/klog` + * packages. + */ + +import go + +/** + * Provides models of commonly used functions in the `github.com/golang/glog` packages and its + * forks. + */ +module Glog { + private class GlogCall extends LoggerCall::Range, DataFlow::CallNode { + GlogCall() { + exists(string pkg, Function f, string fn | + pkg = package(["github.com/golang/glog", "gopkg.in/glog", "k8s.io/klog"], "") and + fn.regexpMatch("(Error|Exit|Fatal|Info|Warning)(|f|ln)") and + this = f.getACall() + | + f.hasQualifiedName(pkg, fn) + or + f.(Method).hasQualifiedName(pkg, "Verbose", fn) + ) + } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/GoKit.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/GoKit.qll new file mode 100644 index 00000000000..c501858997c --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/GoKit.qll @@ -0,0 +1,45 @@ +/** + * Provides classes for working with concepts relating to the [github.com/go-kit/kit](https://pkg.go.dev/github.com/go-kit/kit) package. + * + * Note that these models are not included by default; to include them, add `import semmle.go.frameworks.GoKit` to your query or to + * `Customizations.qll`. + */ + +import go + +/** + * Provides classes for working with concepts relating to the [github.com/go-kit/kit](https://pkg.go.dev/github.com/go-kit/kit) package. + */ +module GoKit { + /** Gets the package name. */ + string packagePath() { result = package("github.com/go-kit/kit", "") } + + /** + * Provides classes for working with concepts relating to the `endpoint` package of the + * [github.com/go-kit/kit](https://pkg.go.dev/github.com/go-kit/kit) package. + */ + module Endpoint { + /** Gets the package name. */ + string endpointPackagePath() { result = package("github.com/go-kit/kit", "endpoint") } + + // gets a function that returns an endpoint + private DataFlow::Node getAnEndpointFactoryResult() { + exists(Function mkFn, FunctionOutput res | + mkFn.getResultType(0).hasQualifiedName(endpointPackagePath(), "Endpoint") and + result = res.getEntryNode(mkFn.getFuncDecl()).getAPredecessor*() + ) + } + + private FuncDef getAnEndpointFunction() { + exists(Function endpointFn | endpointFn.getFuncDecl() = result | + endpointFn.getARead() = getAnEndpointFactoryResult() + ) + or + DataFlow::exprNode(result.(FuncLit)) = getAnEndpointFactoryResult() + } + + private class EndpointRequest extends UntrustedFlowSource::Range { + EndpointRequest() { this = DataFlow::parameterNode(getAnEndpointFunction().getParameter(1)) } + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/GoRestfulHttp.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/GoRestfulHttp.qll new file mode 100644 index 00000000000..ddaf4dde544 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/GoRestfulHttp.qll @@ -0,0 +1,46 @@ +/** + * Provides models of the [go-restful library](https://github.com/emicklei/go-restful). + */ + +import go + +/** + * Provides models of the [go-restful library](https://github.com/emicklei/go-restful). + */ +private module GoRestfulHttp { + /** Gets the package name `github.com/emicklei/go-restful`. */ + string packagePath() { result = package("github.com/emicklei/go-restful", "") } + + /** + * A model for methods defined on go-restful's `Request` object that may return user-controlled data. + */ + private class GoRestfulSourceMethod extends Method { + GoRestfulSourceMethod() { + this.hasQualifiedName(packagePath(), "Request", + [ + "QueryParameters", "QueryParameter", "BodyParameter", "HeaderParameter", "PathParameter", + "PathParameters" + ]) + } + } + + /** + * A model of go-restful's `Request` object as a source of user-controlled data. + */ + private class GoRestfulSource extends UntrustedFlowSource::Range { + GoRestfulSource() { this = any(GoRestfulSourceMethod g).getACall() } + } + + /** + * A model of go-restful's `Request.ReadEntity` method as a source of user-controlled data. + */ + private class GoRestfulReadEntitySource extends UntrustedFlowSource::Range { + GoRestfulReadEntitySource() { + exists(DataFlow::MethodCallNode call | + call.getTarget().hasQualifiedName(packagePath(), "Request", "ReadEntity") + | + this = FunctionOutput::parameter(0).getExitNode(call) + ) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoApiCoreV1.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoApiCoreV1.qll new file mode 100644 index 00000000000..7752a95f400 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoApiCoreV1.qll @@ -0,0 +1,61 @@ +/** Provides models of commonly used functions in the `k8s.io/api/core/v1` package. */ + +import go + +/** + * Provides models of commonly used functions in the `k8s.io/api/core/v1` package. + */ +module K8sIoApiCoreV1 { + /** Gets the package name `k8s.io/api/core/v1`. */ + string packagePath() { result = package("k8s.io/api", "core/v1") } + + private class SecretDeepCopy extends TaintTracking::FunctionModel, Method { + string methodName; + FunctionOutput output; + + SecretDeepCopy() { + ( + methodName in ["DeepCopy", "DeepCopyObject"] and output.isResult() + or + methodName = "DeepCopyInto" and output.isParameter(0) + ) and + this.hasQualifiedName(packagePath(), ["Secret", "SecretList"], methodName) + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp = outp + } + } + + private class SecretMarshal extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + SecretMarshal() { this.hasQualifiedName(packagePath(), ["Secret", "SecretList"], "Marshal") } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isResult(0) } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class SecretUnmarshal extends TaintTracking::FunctionModel, Method, + UnmarshalingFunction::Range { + SecretUnmarshal() { + this.hasQualifiedName(packagePath(), ["Secret", "SecretList"], "Unmarshal") + } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(0) } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoApimachineryPkgRuntime.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoApimachineryPkgRuntime.qll new file mode 100644 index 00000000000..aecc2166ffe --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoApimachineryPkgRuntime.qll @@ -0,0 +1,424 @@ +/** Provides models of commonly used functions in the `k8s.io/apimachinery/pkg/runtime` package. */ + +import go + +/** + * Provides models of commonly used functions in the `k8s.io/apimachinery/pkg/runtime` package. + */ +module K8sIoApimachineryPkgRuntime { + /** Gets the package name `k8s.io/apimachinery/pkg/runtime`. */ + string packagePath() { result = package("k8s.io/apimachinery", "pkg/runtime") } + + private class ConvertTypeToType extends TaintTracking::FunctionModel { + ConvertTypeToType() { + this.hasQualifiedName(packagePath(), + [ + "Convert_Slice_string_To_Pointer_int64", "Convert_Slice_string_To_int", + "Convert_Slice_string_To_int64", "Convert_Slice_string_To_string", + "Convert_runtime_Object_To_runtime_RawExtension", + "Convert_runtime_RawExtension_To_runtime_Object", "Convert_string_To_Pointer_int64", + "Convert_string_To_int64" + ]) + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isParameter(1) + } + } + + private class DecodeInto extends TaintTracking::FunctionModel, UnmarshalingFunction::Range { + DecodeInto() { this.hasQualifiedName(packagePath(), "DecodeInto") } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(1) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(2) } + + override string getFormat() { + // The format is not fixed. It depends on parameter 1 or 2. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class DeepCopyJSON extends TaintTracking::FunctionModel { + DeepCopyJSON() { this.hasQualifiedName(packagePath(), ["DeepCopyJSON", "DeepCopyJSONValue"]) } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isResult() + } + } + + private class Encode extends TaintTracking::FunctionModel, MarshalingFunction::Range { + Encode() { this.hasQualifiedName(packagePath(), ["Encode", "EncodeOrDie"]) } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(1) } + + override DataFlow::FunctionOutput getOutput() { result.isResult(0) } + + override string getFormat() { + // The format is not fixed. It depends on the receiver. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class ReadField extends TaintTracking::FunctionModel { + ReadField() { this.hasQualifiedName(packagePath(), ["Field", "FieldPtr"]) } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isParameter(2) + } + } + + private class SetField extends TaintTracking::FunctionModel { + SetField() { this.hasQualifiedName(packagePath(), "SetField") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isParameter(1) + } + } + + private class CacheableObjectCacheEncode extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + CacheableObjectCacheEncode() { + this.implements(packagePath(), "CacheableObject", "CacheEncode") + } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(2) } + + override string getFormat() { + // The format is not fixed. It depends on the receiver. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class CacheableObjectGetObject extends TaintTracking::FunctionModel, Method { + CacheableObjectGetObject() { this.implements(packagePath(), "CacheableObject", "GetObject") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class DecoderDecode extends TaintTracking::FunctionModel, Method, + UnmarshalingFunction::Range { + DecoderDecode() { + this.implements(packagePath(), "Decoder", "Decode") or + this.hasQualifiedName(packagePath(), "WithoutVersionDecoder", "Decode") + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(2) or result.isResult(0) } + + override string getFormat() { + // The format is not fixed. It depends on the receiver. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class EncoderEncode extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + EncoderEncode() { + this.implements(packagePath(), "Encoder", "Encode") or + this.hasQualifiedName(packagePath(), "WithVersionEncoder", "Encode") + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(1) } + + override string getFormat() { + // The format is not fixed. It depends on the receiver. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class FramerNewFrameReader extends TaintTracking::FunctionModel, Method { + FramerNewFrameReader() { this.implements(packagePath(), "Framer", "NewFrameReader") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isResult() + } + } + + private class FramerNewFrameWriter extends TaintTracking::FunctionModel, Method { + FramerNewFrameWriter() { this.implements(packagePath(), "Framer", "NewFrameWriter") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isResult() + } + } + + private class ObjectDeepCopyObject extends TaintTracking::FunctionModel, Method { + ObjectDeepCopyObject() { this.implements(packagePath(), "Object", "DeepCopyObject") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class Decode extends TaintTracking::FunctionModel, UnmarshalingFunction::Range { + Decode() { this.hasQualifiedName(packagePath(), "Decode") } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(1) } + + override DataFlow::FunctionOutput getOutput() { result.isResult(0) } + + override string getFormat() { + // The format is not fixed. It depends on the parameter 0. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class NewEncodable extends TaintTracking::FunctionModel { + NewEncodable() { this.hasQualifiedName(packagePath(), ["NewEncodable", "NewEncodableList"]) } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(1) and outp.isResult() + } + } + + private class UseOrCreateObject extends TaintTracking::FunctionModel { + UseOrCreateObject() { this.hasQualifiedName(packagePath(), "UseOrCreateObject") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(3) and outp.isResult(0) + } + } + + private class ObjectConvertorConvert extends TaintTracking::FunctionModel, Method { + ObjectConvertorConvert() { this.implements(packagePath(), "ObjectConvertor", "Convert") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isParameter(1) + } + } + + private class ObjectConvertorConvertToVersion extends TaintTracking::FunctionModel, Method { + ObjectConvertorConvertToVersion() { + this.implements(packagePath(), "ObjectConvertor", "ConvertToVersion") + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isResult(0) + } + } + + private class ObjectVersionerConvertToVersion extends TaintTracking::FunctionModel, Method { + ObjectVersionerConvertToVersion() { + this.implements(packagePath(), "ObjectVersioner", "ConvertToVersion") + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isResult(0) + } + } + + private class ParameterCodecDecodeParameters extends TaintTracking::FunctionModel, Method, + UnmarshalingFunction::Range { + ParameterCodecDecodeParameters() { + this.implements(packagePath(), "ParameterCodec", "DecodeParameters") + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(2) } + + override string getFormat() { + // The format is not fixed. It depends on parameter 1. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class ParameterCodecEncodeParameters extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + ParameterCodecEncodeParameters() { + this.implements(packagePath(), "ParameterCodec", "EncodeParameters") + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isResult(0) } + + override string getFormat() { + // The format is not fixed. It depends on parameter 1. + none() + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class ProtobufMarshallerMarshalTo extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + ProtobufMarshallerMarshalTo() { + this.implements(packagePath(), "ProtobufMarshaller", "MarshalTo") or + this.implements(packagePath(), "ProtobufReverseMarshaller", "MarshalToSizedBuffer") + } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(0) } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class RawExtensionDeepCopy extends TaintTracking::FunctionModel, Method { + RawExtensionDeepCopy() { this.hasQualifiedName(packagePath(), "RawExtension", "DeepCopy") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class RawExtensionDeepCopyInto extends TaintTracking::FunctionModel, Method { + RawExtensionDeepCopyInto() { + this.hasQualifiedName(packagePath(), "RawExtension", "DeepCopyInto") + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isParameter(0) + } + } + + private class RawExtensionMarshal extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + RawExtensionMarshal() { this.hasQualifiedName(packagePath(), "RawExtension", "Marshal") } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isResult(0) } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class RawExtensionUnmarshal extends TaintTracking::FunctionModel, Method, + UnmarshalingFunction::Range { + RawExtensionUnmarshal() { this.hasQualifiedName(packagePath(), "RawExtension", "Unmarshal") } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(0) } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class UnknownDeepCopy extends TaintTracking::FunctionModel, Method { + UnknownDeepCopy() { + this.hasQualifiedName(packagePath(), "Unknown", ["DeepCopy", "DeepCopyObject"]) + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class UnknownDeepCopyInto extends TaintTracking::FunctionModel, Method { + UnknownDeepCopyInto() { this.hasQualifiedName(packagePath(), "Unknown", "DeepCopyInto") } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isParameter(0) + } + } + + private class UnknownMarshal extends TaintTracking::FunctionModel, Method, + MarshalingFunction::Range { + string methodName; + + UnknownMarshal() { + methodName in ["Marshal", "NestedMarshalTo"] and + this.hasQualifiedName(packagePath(), "Unknown", methodName) + } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { + methodName = "Marshal" and result.isResult(0) + or + methodName = "NestedMarshalTo" and result.isParameter(0) + } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class UnknownUnmarshal extends TaintTracking::FunctionModel, Method, + UnmarshalingFunction::Range { + UnknownUnmarshal() { this.hasQualifiedName(packagePath(), "Unknown", "Unmarshal") } + + override DataFlow::FunctionInput getAnInput() { result.isReceiver() } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(0) } + + override string getFormat() { result = "protobuf" } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + } + + private class UnstructuredUnstructuredContent extends TaintTracking::FunctionModel, Method { + UnstructuredUnstructuredContent() { + this.implements(packagePath(), "Unstructured", "UnstructuredContent") + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class UnstructuredSetUnstructuredContent extends TaintTracking::FunctionModel, Method { + UnstructuredSetUnstructuredContent() { + this.implements(packagePath(), "Unstructured", "SetUnstructuredContent") + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp.isParameter(0) and outp.isReceiver() + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoClientGo.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoClientGo.qll new file mode 100644 index 00000000000..c087cc26ff8 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/K8sIoClientGo.qll @@ -0,0 +1,28 @@ +/** Provides models of commonly used functions in the `k8s.io/client-go/kubernetes/typed/core/v1` package. */ + +import go + +/** + * Provides models of commonly used functions in the `k8s.io/client-go/kubernetes/typed/core/v1` + * package. + */ +module K8sIoClientGo { + /** Gets the package name `k8s.io/client-go/kubernetes/typed/core/v1`. */ + string packagePath() { result = package("k8s.io/client-go", "kubernetes/typed/core/v1") } + + /** + * A model of `SecretInterface` methods that are sources of secret data. + */ + private class SecretInterfaceSourceMethod extends Method { + SecretInterfaceSourceMethod() { + this.implements(packagePath(), "SecretInterface", ["Get", "List", "Patch"]) + } + } + + /** + * A model of `SecretInterface` as a source of secret data. + */ + class SecretInterfaceSource extends DataFlow::Node { + SecretInterfaceSource() { this = any(SecretInterfaceSourceMethod g).getACall().getResult(0) } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Logrus.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Logrus.qll new file mode 100644 index 00000000000..1106cb57b06 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Logrus.qll @@ -0,0 +1,32 @@ +/** Provides models of commonly used functions in the `github.com/sirupsen/logrus` package. */ + +import go + +/** Provides models of commonly used functions in the `github.com/sirupsen/logrus` package. */ +module Logrus { + /** Gets the package name `github.com/sirupsen/logrus`. */ + string packagePath() { + result = package(["github.com/sirupsen/logrus", "github.com/Sirupsen/logrus"], "") + } + + bindingset[result] + private string getALogResultName() { + result.matches(["Debug%", "Error%", "Fatal%", "Info%", "Panic%", "Print%", "Trace%", "Warn%"]) + } + + bindingset[result] + private string getAnEntryUpdatingMethodName() { + result.regexpMatch("With(Context|Error|Fields?|Time)") + } + + private class LogCall extends LoggerCall::Range, DataFlow::CallNode { + LogCall() { + exists(string name | name = getALogResultName() or name = getAnEntryUpdatingMethodName() | + this.getTarget().hasQualifiedName(packagePath(), name) or + this.getTarget().(Method).hasQualifiedName(packagePath(), "Entry", name) + ) + } + + override DataFlow::Node getAMessageComponent() { result = this.getAnArgument() } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Macaron.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Macaron.qll new file mode 100644 index 00000000000..a38b2b20da0 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Macaron.qll @@ -0,0 +1,30 @@ +/** + * Provides classes for working with concepts relating to the Macaron web framework + */ + +import go + +private module Macaron { + private class Context extends HTTP::ResponseWriter::Range { + SsaWithFields v; + + Context() { + this = v.getBaseVariable().getSourceVariable() and + exists(Method m | m.hasQualifiedName("gopkg.in/macaron.v1", "Context", "Redirect") | + v.getType().getMethod("Redirect") = m + ) + } + + override DataFlow::Node getANode() { result = v.similar().getAUse().getASuccessor*() } + } + + private class RedirectCall extends HTTP::Redirect::Range, DataFlow::MethodCallNode { + RedirectCall() { + this.getTarget().hasQualifiedName("gopkg.in/macaron.v1", "Context", "Redirect") + } + + override DataFlow::Node getUrl() { result = this.getArgument(0) } + + override HTTP::ResponseWriter getResponseWriter() { result.getANode() = this.getReceiver() } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Mux.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Mux.qll new file mode 100644 index 00000000000..bca64c17cf6 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Mux.qll @@ -0,0 +1,17 @@ +/** + * Provides classes for working with concepts in the Mux HTTP middleware library. + */ + +import go + +/** + * Provides classes for working with concepts in the Mux HTTP middleware library. + */ +module Mux { + /** An access to a Mux middleware variable. */ + class RequestVars extends DataFlow::UntrustedFlowSource::Range, DataFlow::CallNode { + RequestVars() { + this.getTarget().hasQualifiedName(package("github.com/gorilla/mux", ""), "Vars") + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/NoSQL.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/NoSQL.qll new file mode 100644 index 00000000000..9f9ca609084 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/NoSQL.qll @@ -0,0 +1,121 @@ +/** + * Provides classes for working with NoSQL-related concepts such as queries. + */ + +import go + +/** Provides classes for working with NoSQL-related APIs. */ +module NoSQL { + /** + * A data-flow node whose value is interpreted as (part of) a NoSQL query. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `NoSQL::Query::Range` instead. + */ + class Query extends DataFlow::Node { + Query::Range self; + + Query() { this = self } + } + + /** Provides classes for working with NoSQL queries. */ + module Query { + /** + * A data-flow node whose value is interpreted as (part of) a NoSQL query. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `NoSQL::Query` instead. + */ + abstract class Range extends DataFlow::Node { } + + /** + * Holds if method `name` of struct `Collection` from package + * [go.mongodb.org/mongo-driver/mongo](https://pkg.go.dev/go.mongodb.org/mongo-driver/mongo) + * interprets parameter `n` as a query. + */ + private predicate mongoDbCollectionMethod(string name, int n) { + // func (coll *Collection) CountDocuments(ctx context.Context, filter interface{}, + // opts ...*options.CountOptions) (int64, error) + name = "CountDocuments" and n = 1 + or + // func (coll *Collection) DeleteMany(ctx context.Context, filter interface{}, + // opts ...*options.DeleteOptions) (*DeleteResult, error) + name = "DeleteMany" and n = 1 + or + // func (coll *Collection) DeleteOne(ctx context.Context, filter interface{}, + // opts ...*options.DeleteOptions) (*DeleteResult, error) + name = "DeleteOne" and n = 1 + or + // func (coll *Collection) Distinct(ctx context.Context, fieldName string, filter interface{}, + // ...) ([]interface{}, error) + name = "Distinct" and n = 2 + or + // func (coll *Collection) Find(ctx context.Context, filter interface{}, + // opts ...*options.FindOptions) (*Cursor, error) + name = "Find" and n = 1 + or + // func (coll *Collection) FindOne(ctx context.Context, filter interface{}, + // opts ...*options.FindOneOptions) *SingleResult + name = "FindOne" and n = 1 + or + // func (coll *Collection) FindOneAndDelete(ctx context.Context, filter interface{}, ...) + // *SingleResult + name = "FindOneAndDelete" and n = 1 + or + // func (coll *Collection) FindOneAndReplace(ctx context.Context, filter interface{}, + // replacement interface{}, ...) *SingleResult + name = "FindOneAndReplace" and n = 1 + or + // func (coll *Collection) FindOneAndUpdate(ctx context.Context, filter interface{}, + // update interface{}, ...) *SingleResult + name = "FindOneAndUpdate" and n = 1 + or + // func (coll *Collection) ReplaceOne(ctx context.Context, filter interface{}, + // replacement interface{}, ...) (*UpdateResult, error) + name = "ReplaceOne" and n = 1 + or + // func (coll *Collection) UpdateMany(ctx context.Context, filter interface{}, + // update interface{}, ...) (*UpdateResult, error) + name = "UpdateMany" and n = 1 + or + // func (coll *Collection) UpdateOne(ctx context.Context, filter interface{}, + // update interface{}, ...) (*UpdateResult, error) + name = "UpdateOne" and n = 1 + or + // func (coll *Collection) Watch(ctx context.Context, pipeline interface{}, ...) + // (*ChangeStream, error) + name = "Watch" and n = 1 + or + // func (coll *Collection) Aggregate(ctx context.Context, pipeline interface{}, + // opts ...*options.AggregateOptions) (*Cursor, error) + name = "Aggregate" and n = 1 + } + + /** + * A query used in an API function acting on a `Collection` struct of package + * [go.mongodb.org/mongo-driver/mongo](https://pkg.go.dev/go.mongodb.org/mongo-driver/mongo). + */ + private class MongoDbCollectionQuery extends Range { + MongoDbCollectionQuery() { + exists(Method meth, string methodName, int n | + mongoDbCollectionMethod(methodName, n) and + meth.hasQualifiedName(package("go.mongodb.org/mongo-driver", "mongo"), "Collection", + methodName) and + this = meth.getACall().getArgument(n) + ) + } + } + } + + /** + * Holds if taint flows from `pred` to `succ` through a MongoDB-specific API. + */ + predicate isAdditionalMongoTaintStep(DataFlow::Node pred, DataFlow::Node succ) { + // Taint an entry if the `Value` is tainted + exists(Write w, DataFlow::Node base, Field f | w.writesField(base, f, pred) | + base = succ.(DataFlow::PostUpdateNode).getPreUpdateNode() and + base.getType().hasQualifiedName(package("go.mongodb.org/mongo-driver", "bson/primitive"), "E") and + f.getName() = "Value" + ) + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Protobuf.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Protobuf.qll new file mode 100644 index 00000000000..9603b015473 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Protobuf.qll @@ -0,0 +1,178 @@ +/** Provides models of commonly used functions and types in the protobuf packages. */ + +import go + +/** Provides models of commonly used functions and types in the protobuf packages. */ +module Protobuf { + /** Gets the name of the modern protobuf top-level implementation package. */ + string modernProtobufPackage() { result = package("google.golang.org/protobuf", "proto") } + + /** Gets the name of the modern protobuf implementation's `protoiface` subpackage. */ + string protobufIfacePackage() { + result = package("google.golang.org/protobuf", "runtime/protoiface") + } + + /** Gets the name of the modern protobuf implementation's `protoreflect` subpackage. */ + string protobufReflectPackage() { + result = package("google.golang.org/protobuf", "reflect/protoreflect") + } + + /** Gets the name of a top-level protobuf implementation package. */ + string protobufPackages() { + result in [package("github.com/golang/protobuf", "proto"), modernProtobufPackage()] + } + + /** The `Marshal` and `MarshalAppend` functions in the protobuf packages. */ + private class MarshalFunction extends TaintTracking::FunctionModel, MarshalingFunction::Range { + string name; + + MarshalFunction() { + name = ["Marshal", "MarshalAppend"] and + ( + this.hasQualifiedName(protobufPackages(), name) or + this.(Method).hasQualifiedName(modernProtobufPackage(), "MarshalOptions", name) + ) + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + + override DataFlow::FunctionInput getAnInput() { + if name = "MarshalAppend" then result.isParameter(1) else result.isParameter(0) + } + + override DataFlow::FunctionOutput getOutput() { + name = "MarshalAppend" and result.isParameter(0) + or + result.isResult(0) + } + + override string getFormat() { result = "protobuf" } + } + + private Field inputMessageField() { + result.hasQualifiedName(protobufIfacePackage(), "MarshalInput", "Message") + } + + private Method marshalStateMethod() { + result.hasQualifiedName(protobufIfacePackage(), "MarshalOptions", "MarshalState") + } + + /** + * Additional taint-flow step modelling flow from `MarshalInput.Message` to `MarshalOutput`, + * mediated by a `MarshalOptions.MarshalState` call. + * + * Note we can taint the whole `MarshalOutput` as it only has one field (`Buf`), and taint- + * tracking always considers a field of a tainted struct to itself be tainted. + */ + private class MarshalStateStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node pred, DataFlow::Node succ) { + exists(DataFlow::PostUpdateNode marshalInput, DataFlow::CallNode marshalStateCall | + marshalStateCall = marshalStateMethod().getACall() and + // pred -> marshalInput.Message + any(DataFlow::Write w) + .writesField(marshalInput.getPreUpdateNode(), inputMessageField(), pred) and + // marshalInput -> marshalStateCall + marshalStateCall.getArgument(0) = globalValueNumber(marshalInput).getANode() and + // marshalStateCall -> succ + marshalStateCall.getResult() = succ + ) + } + } + + /** The `Unmarshal` function in the protobuf packages. */ + class UnmarshalFunction extends TaintTracking::FunctionModel, UnmarshalingFunction::Range { + UnmarshalFunction() { + this.hasQualifiedName(protobufPackages(), "Unmarshal") or + this.(Method).hasQualifiedName(modernProtobufPackage(), "UnmarshalOptions", "Unmarshal") + } + + override predicate hasTaintFlow(DataFlow::FunctionInput inp, DataFlow::FunctionOutput outp) { + inp = getAnInput() and outp = getOutput() + } + + override DataFlow::FunctionInput getAnInput() { result.isParameter(0) } + + override DataFlow::FunctionOutput getOutput() { result.isParameter(1) } + + override string getFormat() { result = "protobuf" } + } + + /** The `Merge` function in the protobuf packages. */ + private class MergeFunction extends TaintTracking::FunctionModel { + MergeFunction() { this.hasQualifiedName(protobufPackages(), "Merge") } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isParameter(1) and outp.isParameter(0) + } + } + + /** A protobuf `Message` type. */ + class MessageType extends Type { + MessageType() { this.implements(protobufReflectPackage(), "ProtoMessage") } + } + + /** The `Clone` function in the protobuf packages. */ + private class MessageCloneFunction extends TaintTracking::FunctionModel { + MessageCloneFunction() { this.hasQualifiedName(protobufPackages(), "Clone") } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isParameter(0) and outp.isResult() + } + } + + /** A `Get` method of a protobuf `Message` type. */ + private class GetMethod extends DataFlow::FunctionModel, Method { + GetMethod() { + exists(string name | name.matches("Get%") | this = any(MessageType msg).getMethod(name)) + } + + override predicate hasDataFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + /** A `ProtoReflect` method of a protobuf `Message` type. */ + private class ProtoReflectMethod extends DataFlow::FunctionModel, Method { + ProtoReflectMethod() { this = any(MessageType msg).getMethod("ProtoReflect") } + + override predicate hasDataFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + /** + * Gets the base of `node`, looking through any dereference node found. + */ + private DataFlow::Node getBaseLookingThroughDerefs(DataFlow::ComponentReadNode node) { + result = node.getBase().(DataFlow::PointerDereferenceNode).getOperand() + or + result = node.getBase() and not node.getBase() instanceof DataFlow::PointerDereferenceNode + } + + /** + * Gets the data-flow node representing the bottom of a stack of zero or more `ComponentReadNode`s + * perhaps with interleaved dereferences. + * + * For example, in the expression a.b[c].d[e], this would return the dataflow node for the read from `a`. + */ + private DataFlow::Node getUnderlyingNode(DataFlow::ReadNode read) { + (result = read or result = getBaseLookingThroughDerefs+(read)) and + not result instanceof DataFlow::ComponentReadNode + } + + /** + * Additional taint step tainting a Message when taint is written to any of its fields and/or elements. + */ + private class WriteMessageFieldStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node pred, DataFlow::Node succ) { + [succ.getType(), succ.getType().getPointerType()] instanceof MessageType and + exists(DataFlow::ReadNode base | + succ.(DataFlow::PostUpdateNode).getPreUpdateNode() = getUnderlyingNode(base) + | + any(DataFlow::Write w).writesComponent(base, pred) + ) + } + } +} diff --git a/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Revel.qll b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Revel.qll new file mode 100644 index 00000000000..7a3733c2da3 --- /dev/null +++ b/repo-tests/codeql-go/ql/lib/semmle/go/frameworks/Revel.qll @@ -0,0 +1,334 @@ +/** + * Provides classes for working with untrusted flow sources from the `github.com/revel/revel` package. + */ + +import go +private import semmle.go.security.OpenUrlRedirectCustomizations + +/** Provides classes and methods modelling the Revel web framework. */ +module Revel { + /** Gets the package name `github.com/revel/revel`. */ + string packagePath() { result = package(["github.com/revel", "github.com/robfig"], "revel") } + + private class ControllerParams extends UntrustedFlowSource::Range, DataFlow::FieldReadNode { + ControllerParams() { + exists(Field f | + this.readsField(_, f) and + f.hasQualifiedName(packagePath(), "Controller", "Params") + ) + } + } + + private class ParamsFixedSanitizer extends TaintTracking::DefaultTaintSanitizer, + DataFlow::FieldReadNode { + ParamsFixedSanitizer() { + exists(Field f | + this.readsField(_, f) and + f.hasQualifiedName(packagePath(), "Params", "Fixed") + ) + } + } + + private class ParamsBind extends TaintTracking::FunctionModel, Method { + ParamsBind() { this.hasQualifiedName(packagePath(), "Params", ["Bind", "BindJSON"]) } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isParameter(0) + } + } + + private class RouteMatchParams extends UntrustedFlowSource::Range, DataFlow::FieldReadNode { + RouteMatchParams() { + exists(Field f | + this.readsField(_, f) and + f.hasQualifiedName(packagePath(), "RouteMatch", "Params") + ) + } + } + + /** An access to an HTTP request field whose value may be controlled by an untrusted user. */ + private class UserControlledRequestField extends UntrustedFlowSource::Range, + DataFlow::FieldReadNode { + UserControlledRequestField() { + exists(string fieldName | + this.getField().hasQualifiedName(packagePath(), "Request", fieldName) + | + fieldName in [ + "Header", "ContentType", "AcceptLanguages", "Locale", "URL", "Form", "MultipartForm" + ] + ) + } + } + + private class UserControlledRequestMethod extends UntrustedFlowSource::Range, + DataFlow::MethodCallNode { + UserControlledRequestMethod() { + this.getTarget() + .hasQualifiedName(packagePath(), "Request", + [ + "FormValue", "PostFormValue", "GetQuery", "GetForm", "GetMultipartForm", "GetBody", + "Cookie", "GetHttpHeader", "GetRequestURI", "MultipartReader", "Referer", "UserAgent" + ]) + } + } + + private class ServerCookieGetValue extends TaintTracking::FunctionModel, Method { + ServerCookieGetValue() { this.implements(packagePath(), "ServerCookie", "GetValue") } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private class ServerMultipartFormGetFiles extends TaintTracking::FunctionModel, Method { + ServerMultipartFormGetFiles() { + this.implements(packagePath(), "ServerMultipartForm", ["GetFiles", "GetValues"]) + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp.isReceiver() and outp.isResult() + } + } + + private string contentTypeFromFilename(DataFlow::Node filename) { + if filename.getStringValue().toLowerCase().matches(["%.htm", "%.html"]) + then result = "text/html" + else result = "application/octet-stream" + // Actually Revel can figure out a variety of other content-types, but none of our analyses care to + // distinguish ones other than text/html. + } + + /** + * `revel.Controller` methods which set the response content-type to and designate a result in one operation. + * + * Note these don't actually generate the response, they return a struct which is then returned by the controller + * method, but it is very likely if a string is being rendered that it will end up sent to the user. + * + * The `Render` and `RenderTemplate` methods are handled by `TemplateRender` below. + * + * The `RenderError` method can actually return HTML content, but again only via an HTML template if one exists; + * we assume it falls back to return plain text as this implies there is probably not an injection opportunity + * but there is an information leakage issue. + * + * The `RenderBinary` method can also return a variety of content-types based on the file extension passed. + * We look particularly for html file extensions, since these are the only ones we currently have special rules + * for (in particular, detecting XSS vulnerabilities). + */ + private class ControllerRenderMethods extends HTTP::ResponseBody::Range { + string contentType; + + ControllerRenderMethods() { + exists(Method m, string methodName, DataFlow::CallNode methodCall | + m.hasQualifiedName(packagePath(), "Controller", methodName) and + methodCall = m.getACall() + | + exists(int exposedArgument | + this = methodCall.getArgument(exposedArgument) and + ( + methodName = "RenderBinary" and + contentType = contentTypeFromFilename(methodCall.getArgument(1)) and + exposedArgument = 0 + or + methodName = "RenderError" and contentType = "text/plain" and exposedArgument = 0 + or + methodName = "RenderHTML" and contentType = "text/html" and exposedArgument = 0 + or + methodName = "RenderJSON" and contentType = "application/json" and exposedArgument = 0 + or + methodName = "RenderJSONP" and + contentType = "application/javascript" and + exposedArgument = 1 + or + methodName = "RenderXML" and contentType = "text/xml" and exposedArgument = 0 + ) + ) + or + methodName = "RenderText" and + contentType = "text/plain" and + this = methodCall.getAnArgument() + ) + } + + override HTTP::ResponseWriter getResponseWriter() { none() } + + override string getAContentType() { result = contentType } + } + + /** + * The `revel.Controller.RenderFileName` method, which instructs Revel to open a file and return its contents. + * We extend FileSystemAccess rather than HTTP::ResponseBody as this will usually mean exposing a user-controlled + * file rather than the actual contents being user-controlled. + */ + private class RenderFileNameCall extends FileSystemAccess::Range, DataFlow::CallNode { + RenderFileNameCall() { + this = + any(Method m | m.hasQualifiedName(packagePath(), "Controller", "RenderFileName")).getACall() + } + + override DataFlow::Node getAPathArgument() { result = getArgument(0) } + } + + /** + * The `revel.Controller.Redirect` method. + * + * It is currently assumed that a tainted `value` in `Redirect(url, value)`, which calls `Sprintf(url, value)` + * internally, cannot lead to an open redirect vulnerability. + */ + private class ControllerRedirectMethod extends HTTP::Redirect::Range, DataFlow::CallNode { + ControllerRedirectMethod() { + exists(Method m | m.hasQualifiedName(packagePath(), "Controller", "Redirect") | + this = m.getACall() + ) + } + + override DataFlow::Node getUrl() { result = this.getArgument(0) } + + override HTTP::ResponseWriter getResponseWriter() { none() } + } + + /** + * The getter and setter methods of `revel.RevelHeader`. + * + * Note we currently don't implement `HeaderWrite` and related concepts, as they are currently only used + * to track content-type, and directly setting headers does not seem to be the usual way to set the response + * content-type for this framework. If and when the `HeaderWrite` concept has a more abstract idea of the + * relationship between header-writes and HTTP responses than looking for a particular `http.ResponseWriter` + * instance connecting the two, then we may implement it here for completeness. + */ + private class RevelHeaderMethods extends TaintTracking::FunctionModel { + FunctionInput input; + FunctionOutput output; + string name; + + RevelHeaderMethods() { + this.(Method).hasQualifiedName(packagePath(), "RevelHeader", name) and + ( + name = ["Add", "Set"] and input.isParameter([0, 1]) and output.isReceiver() + or + name = ["Get", "GetAll"] and input.isReceiver() and output.isResult() + or + name = "SetCookie" and input.isParameter(0) and output.isReceiver() + ) + } + + override predicate hasTaintFlow(FunctionInput inp, FunctionOutput outp) { + inp = input and outp = output + } + } + + /** + * A read in a Revel template that uses Revel's `raw` function. + */ + class RawTemplateRead extends HtmlTemplate::TemplateRead { + RawTemplateRead() { parent.getBody().regexpMatch("(?s)raw\\s.*") } + } + + /** + * A write to a template argument field that is read raw inside of a template. + */ + private class RawTemplateArgument extends HTTP::TemplateResponseBody::Range { + RawTemplateRead read; + + RawTemplateArgument() { + exists(TemplateRender render, VariableWithFields var | + render.getRenderedFile() = read.getFile() and + // if var is a.b.c, any rhs of a write to a, a.b, or a.b.cb + this = var.getParent*().getAWrite().getRhs() + | + var.getParent*() = render.getArgumentVariable() and + ( + var = read.getReadVariable(render.getArgumentVariable()) + or + // if no write or use of that variable exists, no VariableWithFields will be generated + // so we try to find a parent VariableWithFields + // this isn't covered by the 'getParent*' above because no match would be found at all + // for var + not exists(read.getReadVariable(render.getArgumentVariable())) and + exists(string fieldName | fieldName = read.getFieldName() | + var.getQualifiedName() = + render.getArgumentVariable().getQualifiedName() + + ["." + fieldName.substring(0, fieldName.indexOf(".")), ""] + ) + ) + or + // a revel controller.Render(arg) will set controller.ViewArgs["arg"] = arg + exists(Variable arg | arg.getARead() = render.(ControllerRender).getAnArgument() | + var.getBaseVariable() = arg and + var.getQualifiedName() = read.getFieldName() + ) + ) + } + + override string getAContentType() { result = "text/html" } + + override HTTP::ResponseWriter getResponseWriter() { none() } + + override HtmlTemplate::TemplateRead getRead() { result = read } + } + + /** + * A render of a template. + */ + abstract class TemplateRender extends DataFlow::Node, TemplateInstantiation::Range { + /** Gets the name of the file that is rendered. */ + abstract File getRenderedFile(); + + /** Gets the variable passed as an argument to the template. */ + abstract VariableWithFields getArgumentVariable(); + + override DataFlow::Node getADataArgument() { result = this.getArgumentVariable().getAUse() } + } + + private IR::EvalInstruction skipImplicitFieldReads(IR::Instruction insn) { + result = insn or + result = skipImplicitFieldReads(insn.(IR::ImplicitFieldReadInstruction).getBase()) + } + + /** A call to `Controller.Render`. */ + private class ControllerRender extends TemplateRender, DataFlow::MethodCallNode { + ControllerRender() { this.getTarget().hasQualifiedName(packagePath(), "Controller", "Render") } + + override DataFlow::Node getTemplateArgument() { none() } + + override File getRenderedFile() { + exists(Type controllerType, string controllerRe, string handlerRe, string pathRe | + controllerType = skipImplicitFieldReads(this.getReceiver().asInstruction()).getResultType() and + controllerRe = "\\Q" + controllerType.getName() + "\\E" and + handlerRe = "\\Q" + this.getEnclosingCallable().getName() + "\\E" and + // find a file named '/views//(.